11class Stats
2+ class << self
3+ # Welch's t-test (two-tailed). Returns the p-value, or nil if
4+ # either sample is too small to compute a meaningful test.
5+ def welch_p_value ( a , b )
6+ return nil if a . size < 2 || b . size < 2
7+
8+ stats_a = new ( a )
9+ stats_b = new ( b )
10+
11+ n_a = a . size . to_f
12+ n_b = b . size . to_f
13+ var_a = stats_a . sample_variance
14+ var_b = stats_b . sample_variance
15+
16+ se_sq = var_a / n_a + var_b / n_b
17+ if se_sq == 0.0
18+ # Both samples have zero variance — if means match they're
19+ # indistinguishable, otherwise they're trivially different.
20+ return stats_a . mean == stats_b . mean ? 1.0 : 0.0
21+ end
22+
23+ t = ( stats_a . mean - stats_b . mean ) / Math . sqrt ( se_sq )
24+
25+ # Welch-Satterthwaite degrees of freedom
26+ df = se_sq ** 2 / ( ( var_a / n_a ) ** 2 / ( n_a - 1 ) + ( var_b / n_b ) ** 2 / ( n_b - 1 ) )
27+
28+ # Two-tailed p-value: I_x(df/2, 1/2) where x = df/(df + t^2)
29+ x = df / ( df + t * t )
30+ regularized_incomplete_beta ( x , df / 2.0 , 0.5 )
31+ end
32+
33+ private
34+
35+ # Regularized incomplete beta function I_x(alpha, beta) via continued fraction (Lentz's method).
36+ # Returns the probability that a Beta(alpha, beta)-distributed variable is <= x.
37+ def regularized_incomplete_beta ( x , alpha , beta )
38+ return 0.0 if x <= 0.0
39+ return 1.0 if x >= 1.0
40+
41+ # Symmetry relation: pick the side that converges faster
42+ if x > ( alpha + 1.0 ) / ( alpha + beta + 2.0 )
43+ return 1.0 - regularized_incomplete_beta ( 1.0 - x , beta , alpha )
44+ end
45+
46+ # B(alpha, beta) * x^alpha * (1-x)^beta — computed in log-space to avoid overflow
47+ ln_normalizer = Math . lgamma ( alpha + beta ) [ 0 ] - Math . lgamma ( alpha ) [ 0 ] - Math . lgamma ( beta ) [ 0 ] +
48+ alpha * Math . log ( x ) + beta * Math . log ( 1.0 - x )
49+ normalizer = Math . exp ( ln_normalizer )
50+
51+ normalizer * beta_continued_fraction ( x , alpha , beta ) / alpha
52+ end
53+
54+ # Evaluates the continued fraction for I_x(alpha, beta) using Lentz's algorithm.
55+ # Each iteration computes two sub-steps (even and odd terms of the fraction).
56+ def beta_continued_fraction ( x , alpha , beta )
57+ floor = 1.0e-30 # prevent division by zero in Lentz's method
58+ converged = false
59+
60+ numerator_term = 1.0
61+ denominator_term = 1.0 - ( alpha + beta ) * x / ( alpha + 1.0 )
62+ denominator_term = floor if denominator_term . abs < floor
63+ denominator_term = 1.0 / denominator_term
64+ fraction = denominator_term
65+
66+ ( 1 ..200 ) . each do |iteration |
67+ two_i = 2 * iteration
68+
69+ # Even sub-step: d_{2m} coefficient of the continued fraction
70+ coeff = iteration * ( beta - iteration ) * x / ( ( alpha + two_i - 1.0 ) * ( alpha + two_i ) )
71+ denominator_term = 1.0 + coeff * denominator_term
72+ denominator_term = floor if denominator_term . abs < floor
73+ numerator_term = 1.0 + coeff / numerator_term
74+ numerator_term = floor if numerator_term . abs < floor
75+ denominator_term = 1.0 / denominator_term
76+ fraction *= denominator_term * numerator_term
77+
78+ # Odd sub-step: d_{2m+1} coefficient of the continued fraction
79+ coeff = -( alpha + iteration ) * ( alpha + beta + iteration ) * x / ( ( alpha + two_i ) * ( alpha + two_i + 1.0 ) )
80+ denominator_term = 1.0 + coeff * denominator_term
81+ denominator_term = floor if denominator_term . abs < floor
82+ numerator_term = 1.0 + coeff / numerator_term
83+ numerator_term = floor if numerator_term . abs < floor
84+ denominator_term = 1.0 / denominator_term
85+ correction = denominator_term * numerator_term
86+ fraction *= correction
87+
88+ if ( correction - 1.0 ) . abs < 1.0e-10
89+ converged = true
90+ break
91+ end
92+ end
93+
94+ warn "Stats.beta_continued_fraction: did not converge (alpha=#{ alpha } , beta=#{ beta } , x=#{ x } )" unless converged
95+ fraction
96+ end
97+ end
98+
299 def initialize ( data )
3100 @data = data
4101 end
@@ -15,13 +112,20 @@ def mean
15112 @data . sum ( 0.0 ) / @data . size
16113 end
17114
115+ # Population standard deviation (N denominator) — describes these specific values.
18116 def stddev
19117 mean = self . mean
20118 diffs_squared = @data . map { |v | ( v -mean ) * ( v -mean ) }
21119 mean_squared = diffs_squared . sum ( 0.0 ) / @data . size
22120 Math . sqrt ( mean_squared )
23121 end
24122
123+ # Unbiased sample variance (N-1 denominator, Bessel's correction) — for inference.
124+ def sample_variance
125+ m = mean
126+ @data . sum { |v | ( v - m ) ** 2 } / ( @data . size - 1 ) . to_f
127+ end
128+
25129 def median
26130 compute_median ( @data )
27131 end
0 commit comments