From d850464a93ee185bacac6f9d60297fd2dd792bab Mon Sep 17 00:00:00 2001 From: nataziel Date: Sat, 31 May 2025 10:25:09 +1000 Subject: [PATCH 1/6] add specialised inverse cdf implementation for geometric distribution --- src/distribution/geometric.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/distribution/geometric.rs b/src/distribution/geometric.rs index e794fdf9..a9e8dc99 100644 --- a/src/distribution/geometric.rs +++ b/src/distribution/geometric.rs @@ -2,6 +2,7 @@ use crate::distribution::{Discrete, DiscreteCDF}; use crate::prec; use crate::statistics::*; use core::f64; +use num_traits::{One, Zero}; /// Implements the /// [Geometric](https://en.wikipedia.org/wiki/Geometric_distribution) @@ -153,6 +154,33 @@ impl DiscreteCDF for Geometric { ((-self.p).ln_1p() * (x as f64)).exp() } } + + /// Calculates the inverse cumulative distribution function for the + /// geometric distribution at `x`. + /// In other languages, such as R, this is known as the the quantile function. + /// + /// # Formula + /// + /// ```text + /// p = ceil(log(1-x)/log(1-p)) + /// ``` + /// + /// # Panics + /// panics if provided `x` not on interval [0.0, 1.0] + fn inverse_cdf(&self, x: f64) -> u64 { + // ceil(log(1-x)/log(1-self.p)) = ceil(log1p(-x)/log1p(-self.p)) + // = ceil((-x).ln_1p()/(-self.p).ln_1p()) + // = ((-x).ln_1p()/(-self.p).ln_1p()).ceil() + if x <= self.cdf(self.min()) { + return self.min(); + } else if x == ::one() { + return self.max(); + } else if !(::zero()..=::one()).contains(&x) { + std::panic!("p must be on [0, 1]") + } + + ((-x).ln_1p() / (-self.p).ln_1p()).ceil() as u64 + } } impl Min for Geometric { From a6abcc1309b0d48258da5a7aef200320a8893a92 Mon Sep 17 00:00:00 2001 From: Goose Date: Tue, 5 Aug 2025 11:48:30 +1000 Subject: [PATCH 2/6] add tests for coverage --- src/distribution/geometric.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/distribution/geometric.rs b/src/distribution/geometric.rs index a9e8dc99..55dcc6c6 100644 --- a/src/distribution/geometric.rs +++ b/src/distribution/geometric.rs @@ -548,5 +548,14 @@ mod tests { test_exact(1., 1, invcdf(1.)); test_exact(0.2, 1, invcdf(0.2)); test_exact(0.004, 173, invcdf(0.5)); + test_exact(0.5, u64::MAX, invcdf(1.)); + test_exact(0.5, 2, invcdf(0.75)); + } + + #[test] + #[should_panic] + fn test_inverse_cdf_panic() { + let invcdf = |arg: f64| move |x: Geometric| x.inverse_cdf(arg); + test_exact(1., 1, invcdf(2.)); } } From 81c4a4815687fb081e555d741a81dc0457bb7b1d Mon Sep 17 00:00:00 2001 From: Goose Date: Tue, 5 Aug 2025 11:48:39 +1000 Subject: [PATCH 3/6] add comments --- src/distribution/geometric.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/distribution/geometric.rs b/src/distribution/geometric.rs index 55dcc6c6..26457ca6 100644 --- a/src/distribution/geometric.rs +++ b/src/distribution/geometric.rs @@ -172,8 +172,11 @@ impl DiscreteCDF for Geometric { // = ceil((-x).ln_1p()/(-self.p).ln_1p()) // = ((-x).ln_1p()/(-self.p).ln_1p()).ceil() if x <= self.cdf(self.min()) { + // if p = 1 this branch will always be taken + // no matter the value of x return self.min(); } else if x == ::one() { + // note that if p = 1 & x = 1 the above branch is taken return self.max(); } else if !(::zero()..=::one()).contains(&x) { std::panic!("p must be on [0, 1]") From 7c0c2eda97454d66a5475a433e552480d7e8603d Mon Sep 17 00:00:00 2001 From: Goose Date: Mon, 3 Nov 2025 10:59:04 +1000 Subject: [PATCH 4/6] fix kde formatting --- src/function/kernel.rs | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/function/kernel.rs b/src/function/kernel.rs index 4046a831..7fbc7dae 100644 --- a/src/function/kernel.rs +++ b/src/function/kernel.rs @@ -66,11 +66,7 @@ pub struct Epanechnikov; impl Kernel for Epanechnikov { fn evaluate(&self, x: f64) -> f64 { let a = x.abs(); - if a <= 1.0 { - 0.75 * (1.0 - a * a) - } else { - 0.0 - } + if a <= 1.0 { 0.75 * (1.0 - a * a) } else { 0.0 } } fn support(&self) -> Option<(f64, f64)> { @@ -85,11 +81,7 @@ pub struct Triangular; impl Kernel for Triangular { fn evaluate(&self, x: f64) -> f64 { let a = x.abs(); - if a <= 1.0 { - 1.0 - a - } else { - 0.0 - } + if a <= 1.0 { 1.0 - a } else { 0.0 } } fn support(&self) -> Option<(f64, f64)> { @@ -143,11 +135,7 @@ pub struct Uniform; impl Kernel for Uniform { fn evaluate(&self, x: f64) -> f64 { - if x.abs() <= 1.0 { - 0.5 - } else { - 0.0 - } + if x.abs() <= 1.0 { 0.5 } else { 0.0 } } fn support(&self) -> Option<(f64, f64)> { From 97128285adc40b33fec1fe5aa3aea6e7a350173d Mon Sep 17 00:00:00 2001 From: Goose Date: Mon, 3 Nov 2025 11:03:16 +1000 Subject: [PATCH 5/6] use core instead of std constants --- src/function/kernel.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/function/kernel.rs b/src/function/kernel.rs index 7fbc7dae..daf9b3d0 100644 --- a/src/function/kernel.rs +++ b/src/function/kernel.rs @@ -28,7 +28,7 @@ //! assert!((e.evaluate(0.0) - 0.75).abs() < 1e-12); //! ``` -use std::f64::consts::{FRAC_PI_2, PI}; +use core::f64::consts::{FRAC_PI_2, PI}; /// Common interface for kernel functions used in KDE and smoothing. pub trait Kernel { From 441e768ed58787310f3546779afb50a2ee6c8e65 Mon Sep 17 00:00:00 2001 From: Goose Date: Mon, 3 Nov 2025 11:07:55 +1000 Subject: [PATCH 6/6] use core panic instead of std --- src/distribution/geometric.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/distribution/geometric.rs b/src/distribution/geometric.rs index 26457ca6..54aebdbc 100644 --- a/src/distribution/geometric.rs +++ b/src/distribution/geometric.rs @@ -175,11 +175,12 @@ impl DiscreteCDF for Geometric { // if p = 1 this branch will always be taken // no matter the value of x return self.min(); - } else if x == ::one() { - // note that if p = 1 & x = 1 the above branch is taken + } + if x == ::one() { return self.max(); - } else if !(::zero()..=::one()).contains(&x) { - std::panic!("p must be on [0, 1]") + } + if !(::zero()..=::one()).contains(&x) { + core::panic!("p must be on [0, 1]") } ((-x).ln_1p() / (-self.p).ln_1p()).ceil() as u64