|
| 1 | +"""Ridge Regression (L2 regularization) implemented with batch gradient descent. |
| 2 | +
|
| 3 | +This module provides a small, well-tested `RidgeRegression` class that is |
| 4 | +compatible with the existing `linear_regression` demo dataset (ADR vs Rating). |
| 5 | +
|
| 6 | +Features: |
| 7 | +- Bias (intercept) handled automatically unless the caller provides an |
| 8 | + already-augmented feature matrix. |
| 9 | +- L2 regularization that excludes the bias term. |
| 10 | +- `mean_absolute_error` utility and a small `main()` demo that fetches the |
| 11 | + CSGO ADR vs Rating CSV used elsewhere in the repository. |
| 12 | +
|
| 13 | +Examples |
| 14 | +-------- |
| 15 | +>>> import numpy as np |
| 16 | +>>> X = np.array([[1.0], [2.0], [3.0]]) |
| 17 | +>>> y = np.array([2.0, 4.0, 6.0]) |
| 18 | +>>> model = RidgeRegression(learning_rate=0.1, lambda_=0.0, epochs=2000) |
| 19 | +>>> model.fit(X, y) |
| 20 | +>>> np.allclose(model.weights, [0.0, 2.0], atol=1e-2) |
| 21 | +True |
| 22 | +>>> model.predict(np.array([[4.0], [5.0]])) |
| 23 | +array([ 8., 10.]) |
| 24 | +""" |
| 25 | + |
| 26 | +from __future__ import annotations |
| 27 | + |
| 28 | +from dataclasses import dataclass |
| 29 | +import httpx |
| 30 | +import numpy as np |
| 31 | +from typing import Optional |
| 32 | + |
| 33 | + |
| 34 | +@dataclass |
| 35 | +class RidgeRegression: |
| 36 | + """Ridge Regression using batch gradient descent. |
| 37 | +
|
| 38 | + Parameters |
| 39 | + ---------- |
| 40 | + learning_rate: float |
| 41 | + Step size for gradient descent (must be > 0). |
| 42 | + lambda_: float |
| 43 | + L2 regularization strength (must be >= 0). Regularization is NOT |
| 44 | + applied to the bias (intercept) term. |
| 45 | + epochs: int |
| 46 | + Number of gradient descent iterations (must be > 0). |
| 47 | + """ |
| 48 | + |
| 49 | + learning_rate: float = 0.01 |
| 50 | + lambda_: float = 0.1 |
| 51 | + epochs: int = 1000 |
| 52 | + weights: Optional[np.ndarray] = None |
| 53 | + |
| 54 | + def __post_init__(self) -> None: |
| 55 | + if self.learning_rate <= 0: |
| 56 | + raise ValueError("learning_rate must be positive") |
| 57 | + if self.lambda_ < 0: |
| 58 | + raise ValueError("lambda_ must be non-negative") |
| 59 | + if self.epochs <= 0: |
| 60 | + raise ValueError("epochs must be positive") |
| 61 | + |
| 62 | + @staticmethod |
| 63 | + def _add_intercept(features: np.ndarray) -> np.ndarray: |
| 64 | + if features.ndim != 2: |
| 65 | + raise ValueError("features must be a 2D array") |
| 66 | + n_samples = features.shape[0] |
| 67 | + return np.c_[np.ones(n_samples), features] |
| 68 | + |
| 69 | + def fit(self, features: np.ndarray, target: np.ndarray, add_intercept: bool = True) -> None: |
| 70 | + """Train the ridge regression model. |
| 71 | +
|
| 72 | + Parameters |
| 73 | + ---------- |
| 74 | + features: np.ndarray |
| 75 | + 2D array (n_samples, n_features) |
| 76 | + target: np.ndarray |
| 77 | + 1D array (n_samples,) |
| 78 | + add_intercept: bool |
| 79 | + If True the model will add a bias column of ones to `features`. |
| 80 | + """ |
| 81 | + if features.ndim != 2: |
| 82 | + raise ValueError("features must be a 2D array") |
| 83 | + if target.ndim != 1: |
| 84 | + raise ValueError("target must be a 1D array") |
| 85 | + if features.shape[0] != target.shape[0]: |
| 86 | + raise ValueError("Number of samples must match") |
| 87 | + |
| 88 | + X = features if not add_intercept else self._add_intercept(features) |
| 89 | + n_samples, n_features = X.shape |
| 90 | + |
| 91 | + # initialize weights (including bias as weights[0]) |
| 92 | + self.weights = np.zeros(n_features) |
| 93 | + |
| 94 | + for _ in range(self.epochs): |
| 95 | + preds = X @ self.weights |
| 96 | + errors = preds - target |
| 97 | + |
| 98 | + # gradient without regularization |
| 99 | + grad = (X.T @ errors) / n_samples |
| 100 | + |
| 101 | + # add L2 regularization term (do not regularize bias term) |
| 102 | + reg = np.concatenate(([0.0], 2 * self.lambda_ * self.weights[1:])) |
| 103 | + grad += reg |
| 104 | + |
| 105 | + self.weights -= self.learning_rate * grad |
| 106 | + |
| 107 | + def predict(self, features: np.ndarray, add_intercept: bool = True) -> np.ndarray: |
| 108 | + """Predict target values for `features`. |
| 109 | +
|
| 110 | + Parameters |
| 111 | + ---------- |
| 112 | + features: np.ndarray |
| 113 | + 2D array (n_samples, n_features) |
| 114 | + add_intercept: bool |
| 115 | + If True, add bias column to features before prediction. |
| 116 | + """ |
| 117 | + if self.weights is None: |
| 118 | + raise ValueError("Model is not trained") |
| 119 | + X = features if not add_intercept else self._add_intercept(features) |
| 120 | + return X @ self.weights |
| 121 | + |
| 122 | + |
| 123 | +def mean_absolute_error(predicted: np.ndarray, actual: np.ndarray) -> float: |
| 124 | + """Return mean absolute error between two 1D arrays.""" |
| 125 | + predicted = np.asarray(predicted) |
| 126 | + actual = np.asarray(actual) |
| 127 | + if predicted.shape != actual.shape: |
| 128 | + raise ValueError("predicted and actual must have the same shape") |
| 129 | + return float(np.mean(np.abs(predicted - actual))) |
| 130 | + |
| 131 | + |
| 132 | +def collect_dataset() -> np.matrix: |
| 133 | + """Fetch the ADR vs Rating CSV used in the repo's linear regression demo.""" |
| 134 | + response = httpx.get( |
| 135 | + "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/" |
| 136 | + "master/Week1/ADRvsRating.csv", |
| 137 | + timeout=10, |
| 138 | + ) |
| 139 | + lines = response.text.splitlines() |
| 140 | + data = [line.split(",") for line in lines] |
| 141 | + data.pop(0) |
| 142 | + return np.matrix(data) |
| 143 | + |
| 144 | + |
| 145 | +def main() -> None: |
| 146 | + data = collect_dataset() |
| 147 | + n = data.shape[0] |
| 148 | + |
| 149 | + # features and target (same layout as linear_regression.py) |
| 150 | + X = np.c_[data[:, 0].astype(float)] |
| 151 | + y = np.ravel(data[:, 1].astype(float)) |
| 152 | + |
| 153 | + model = RidgeRegression(learning_rate=0.0002, lambda_=0.01, epochs=50000) |
| 154 | + model.fit(X, y) |
| 155 | + |
| 156 | + preds = model.predict(X) |
| 157 | + mae = mean_absolute_error(preds, y) |
| 158 | + |
| 159 | + print("Learned weights:") |
| 160 | + for i, w in enumerate(model.weights): |
| 161 | + print(f"w[{i}] = {w:.6f}") |
| 162 | + print(f"MAE on training data: {mae:.6f}") |
| 163 | + |
| 164 | + |
| 165 | +if __name__ == "__main__": |
| 166 | + import doctest |
| 167 | + |
| 168 | + doctest.testmod() |
| 169 | + main() |
0 commit comments