Skip to content

Commit 3c5e410

Browse files
Add Gaussian Naive Bayes classifier
1 parent 2c15b8c commit 3c5e410

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

machine_learning/naive_bayes.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""
2+
Naive Bayes Classifier implementation.
3+
4+
This module implements Gaussian Naive Bayes from scratch without using
5+
external machine learning libraries.
6+
7+
References:
8+
https://en.wikipedia.org/wiki/Naive_Bayes_classifier
9+
"""
10+
11+
from typing import List, Dict
12+
import math
13+
14+
15+
def gaussian_probability(x: float, mean: float, variance: float) -> float:
16+
"""
17+
Calculate Gaussian probability density.
18+
19+
>>> round(gaussian_probability(1.0, 1.0, 1.0), 3)
20+
0.399
21+
>>> gaussian_probability(1.0, 1.0, 0.0)
22+
0.0
23+
"""
24+
if variance == 0:
25+
return 0.0
26+
27+
exponent = math.exp(-((x - mean) ** 2) / (2 * variance))
28+
return (1 / math.sqrt(2 * math.pi * variance)) * exponent
29+
30+
31+
class GaussianNaiveBayes:
32+
"""
33+
Gaussian Naive Bayes classifier.
34+
"""
35+
36+
def __init__(self) -> None:
37+
self.class_priors: Dict[int, float] = {}
38+
self.means: Dict[int, List[float]] = {}
39+
self.variances: Dict[int, List[float]] = {}
40+
41+
def fit(self, features: List[List[float]], labels: List[int]) -> None:
42+
"""
43+
Train the Gaussian Naive Bayes classifier.
44+
45+
:param features: Feature matrix
46+
:param labels: Class labels
47+
:raises ValueError: If input sizes mismatch
48+
49+
>>> model = GaussianNaiveBayes()
50+
>>> model.fit([[1.0], [2.0], [3.0]], [0, 0, 1])
51+
"""
52+
if len(features) != len(labels):
53+
raise ValueError("Features and labels must have the same length")
54+
55+
separated: Dict[int, List[List[float]]] = {}
56+
for feature_vector, label in zip(features, labels):
57+
separated.setdefault(label, []).append(feature_vector)
58+
59+
total_samples = len(labels)
60+
61+
for label, rows in separated.items():
62+
self.class_priors[label] = len(rows) / total_samples
63+
64+
transposed = list(zip(*rows))
65+
self.means[label] = [sum(col) / len(col) for col in transposed]
66+
67+
self.variances[label] = [
68+
sum((x - mean) ** 2 for x in col) / len(col)
69+
for col, mean in zip(transposed, self.means[label])
70+
]
71+
72+
def predict(self, features: List[List[float]]) -> List[int]:
73+
"""
74+
Predict class labels for input features.
75+
76+
:param features: Feature matrix
77+
:return: Predicted labels
78+
79+
>>> model = GaussianNaiveBayes()
80+
>>> X = [[1.0], [2.0], [3.0], [4.0]]
81+
>>> y = [0, 0, 1, 1]
82+
>>> model.fit(X, y)
83+
>>> model.predict([[1.5], [3.5]])
84+
[0, 1]
85+
"""
86+
predictions: List[int] = []
87+
88+
for row in features:
89+
class_scores: Dict[int, float] = {}
90+
91+
for label in self.class_priors:
92+
score = math.log(self.class_priors[label])
93+
94+
for index, value in enumerate(row):
95+
mean = self.means[label][index]
96+
variance = self.variances[label][index]
97+
probability = gaussian_probability(value, mean, variance)
98+
99+
if probability > 0:
100+
score += math.log(probability)
101+
102+
class_scores[label] = score
103+
104+
predicted_label = max(
105+
class_scores.items(),
106+
key=lambda item: item[1],
107+
)[0]
108+
predictions.append(predicted_label)
109+
110+
return predictions

0 commit comments

Comments
 (0)