Source code for titanicprediction.core.algorithms

from dataclasses import dataclass
from typing import Any

import numpy as np
from sklearn.preprocessing import PolynomialFeatures

FloatArray = np.ndarray



[docs]
@dataclass
class GradientDescentResult:
    """
    Gradient descent result dataclass state.

    Attributes:
        weights: Final weights after gradient descent optimization.
        bias: Final bias term after optimization.
        loss_history: List of loss values during training.
        convergence_info: Dictionary containing convergence information.

    """

    weights: FloatArray
    bias: float
    loss_history: list[float]
    convergence_info: dict[str, Any]




[docs]
def sigmoid(z: FloatArray) -> FloatArray:
    """
    Sigmoid function.

    Create the sigmoid function from float array.

    Args:
        z: Input array.

    Returns:
        Sigmoid function result.

    """
    z = np.clip(z, -50, 50)
    return 1 / (1 + np.exp(-z))




[docs]
def add_polynomial_features(x: FloatArray, degree: int = 3) -> FloatArray:
    """
    Add polynomial features to the input data.

    Args:
        x: Input feature matrix.
        degree: Degree of polynomial features. Defaults to 2.

    Returns:
        Transformed feature matrix with polynomial features.

    """
    poly = PolynomialFeatures(degree=degree, include_bias=False)
    return poly.fit_transform(x)




[docs]
def binary_cross_entropy(
    y_true: FloatArray,
    y_pred: FloatArray,
    weights: FloatArray,
    lambda_reg: float = 0.01,
    epsilon: float = 1e-12,
) -> float:
    """
    Calculate binary cross-entropy loss with regularization.

    Args:
        y_true: Ground truth labels.
        y_pred: Predicted probabilities.
        weights: Model weights for regularization.
        lambda_reg: Regularization parameter. Defaults to 0.01.
        epsilon: Small value to avoid numerical issues. Defaults to 1e-12.

    Returns:
        Binary cross-entropy loss value with regularization.

    """
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    n_total = len(y_true)
    n_pos = np.sum(y_true)
    n_neg = n_total - n_pos

    if n_pos > 0 and n_neg > 0:
        weight_pos = n_total / (2 * n_pos)
        weight_neg = n_total / (2 * n_neg)

        sample_weights = np.where(y_true == 1, weight_pos, weight_neg)

        loss_per_sample = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

        weighted_loss = np.sum(sample_weights * loss_per_sample)
        loss = weighted_loss / np.sum(sample_weights)
    else:
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    reg_loss = (lambda_reg / (2 * n_total)) * np.sum(weights**2)

    return float(loss + reg_loss)




[docs]
def compute_gradients(
    x: FloatArray,
    y_true: FloatArray,
    y_pred: FloatArray,
    weights: FloatArray,
    lambda_reg: float = 0.01,
    class_weight: dict | None = None,
) -> tuple[FloatArray, float]:
    """
    Compute gradients for logistic regression.

    Args:
        x: Input feature matrix.
        y_true: Ground truth labels.
        y_pred: Predicted probabilities.
        weights: Model weights for regularization.
        lambda_reg: Regularization parameter. Defaults to 0.01.
        class_weight: Optional class weights dictionary.

    Returns:
        Tuple containing:
            - Weight gradients
            - Bias gradient

    """
    m = len(y_true)
    error = y_pred - y_true

    n_pos = np.sum(y_true)
    n_neg = m - n_pos

    if n_pos > 0 and n_neg > 0:
        weight_neg = n_pos / m
        weight_pos = n_neg / m

        sample_weights = np.where(y_true == 1, weight_pos, weight_neg)
        error = error * sample_weights

        dw = (1 / np.sum(sample_weights)) * np.dot(x.T, error)
        db = (1 / np.sum(sample_weights)) * np.sum(error)
    else:
        dw = (1 / m) * np.dot(x.T, error)
        db = (1 / m) * np.sum(error)

    dw_reg = dw + (lambda_reg / m) * weights

    return dw_reg, float(db)




[docs]
def gradient_descent(
    x: FloatArray,
    y: FloatArray,
    learning_rate: float = 0.001,
    epochs: int = 1000,
    convergence_tol: float = 1e-6,
    beta1: float = 0.9,
    beta2: float = 0.999,
    epsilon: float = 1e-8,
    lambda_reg: float = 0.01,
) -> GradientDescentResult:
    """
    Perform gradient descent optimization with Adam optimizer.

    Args:
        x: Input feature matrix.
        y: Target labels.
        learning_rate: Learning rate for optimization. Defaults to 0.001.
        epochs: Maximum number of epochs. Defaults to 1000.
        convergence_tol: Convergence tolerance. Defaults to 1e-6.
        beta1: Adam beta1 parameter. Defaults to 0.9.
        beta2: Adam beta2 parameter. Defaults to 0.999.
        epsilon: Adam epsilon parameter. Defaults to 1e-8.
        lambda_reg: Regularization parameter. Defaults to 0.01.

    Returns:
        GradientDescentResult containing optimization results.

    """
    n_features = x.shape[1]
    weights = np.random.normal(0, 0.01, n_features)
    bias = 0.0
    loss_history = []

    m_dw = np.zeros_like(weights)
    m_db = 0.0
    v_dw = np.zeros_like(weights)
    v_db = 0.0

    for epoch in range(1, epochs + 1):
        z = np.dot(x, weights) + bias
        predictions = sigmoid(z)

        loss = binary_cross_entropy(y, predictions, weights, lambda_reg)
        loss_history.append(loss)

        dw, db = compute_gradients(x, y, predictions, weights, lambda_reg)

        m_dw = beta1 * m_dw + (1 - beta1) * dw
        m_db = beta1 * m_db + (1 - beta1) * db
        v_dw = beta2 * v_dw + (1 - beta2) * (dw**2)
        v_db = beta2 * v_db + (1 - beta2) * (db**2)

        beta1_power = max(beta1**epoch, 1e-12)
        beta2_power = max(beta2**epoch, 1e-12)

        m_dw_corrected = m_dw / (1 - beta1_power)
        m_db_corrected = m_db / (1 - beta1_power)
        v_dw_corrected = v_dw / (1 - beta2_power)
        v_db_corrected = v_db / (1 - beta2_power)

        weights -= learning_rate * (
            m_dw_corrected / (np.sqrt(v_dw_corrected) + epsilon)
        )
        bias -= learning_rate * (m_db_corrected / (np.sqrt(v_db_corrected) + epsilon))

        if epoch > 100 and abs(loss_history[-2] - loss_history[-1]) < convergence_tol:
            break

    convergence_info = {
        "final_loss": loss_history[-1],
        "epochs_completed": len(loss_history),
        "learning_rate": learning_rate,
        "optimizer": "adam",
        "lambda_reg": lambda_reg,
    }

    return GradientDescentResult(
        weights=weights,
        bias=bias,
        loss_history=loss_history,
        convergence_info=convergence_info,
    )




[docs]
def standard_gradient_descent(
    x: FloatArray,
    y: FloatArray,
    learning_rate: float = 0.01,
    epochs: int = 1000,
    convergence_tol: float = 1e-6,
    beta: float = 0.9,
    lambda_reg: float = 0.01,
) -> GradientDescentResult:
    """
    Perform standard gradient descent with momentum.

    Args:
        x: Input feature matrix.
        y: Target labels.
        learning_rate: Learning rate for optimization. Defaults to 0.01.
        epochs: Maximum number of epochs. Defaults to 1000.
        convergence_tol: Convergence tolerance. Defaults to 1e-6.
        beta: Momentum parameter. Defaults to 0.9.
        lambda_reg: Regularization parameter. Defaults to 0.01.

    Returns:
        GradientDescentResult containing optimization results.

    """
    n_features = x.shape[1]

    weights = np.random.normal(0, 0.01, n_features)
    bias = 0.0
    loss_history = []

    v_dw = np.zeros_like(weights)
    v_db = 0.0

    for epoch in range(epochs):
        z = np.dot(x, weights) + bias
        predictions = sigmoid(z)

        loss = binary_cross_entropy(y, predictions, weights, lambda_reg)
        loss_history.append(loss)

        dw, db = compute_gradients(x, y, predictions, weights, lambda_reg)

        v_dw = beta * v_dw + (1 - beta) * dw
        v_db = beta * v_db + (1 - beta) * db

        weights -= learning_rate * v_dw
        bias -= learning_rate * v_db

        if epoch > 100 and len(loss_history) > 1:
            loss_change = abs(loss_history[-2] - loss_history[-1])
            if loss_change < convergence_tol:
                break

    convergence_info = {
        "final_loss": loss_history[-1] if loss_history else float("inf"),
        "epochs_completed": len(loss_history),
        "learning_rate": learning_rate,
        "with_momentum": True,
        "lambda_reg": lambda_reg,
    }

    return GradientDescentResult(
        weights=weights,
        bias=bias,
        loss_history=loss_history,
        convergence_info=convergence_info,
    )




[docs]
def predict_proba(x: FloatArray, weights: FloatArray, bias: float) -> FloatArray:
    """
    Predict probabilities for binary classification.

    Args:
        x: Input feature matrix.
        weights: Model weights.
        bias: Model bias.

    Returns:
        Predicted probabilities for positive class.

    """
    z = np.dot(x, weights) + bias
    return sigmoid(z)




[docs]
def predict(
    x: FloatArray, weights: FloatArray, bias: float, threshold: float | None = None
) -> FloatArray:
    """
    Make binary predictions.

    Args:
        x: Input feature matrix.
        weights: Model weights.
        bias: Model bias.
        threshold: Classification threshold. Defaults to 0.5.

    Returns:
        Binary predictions (0 or 1).

    """
    probabilities = predict_proba(x, weights, bias)

    if threshold is None:
        threshold = 0.5

    return (probabilities >= threshold).astype(int)