From 7c330c93f21af931a96df2921608576602d50096 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Sun, 25 Dec 2022 23:40:04 +0100 Subject: [PATCH 01/12] feat: trait distribution::ContinuousMultivariateCDF --- src/distribution/mod.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index 23ae7ec5..d7daa863 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -3,6 +3,7 @@ //! concrete implementations for a variety of distributions. use super::statistics::{Max, Min}; use ::num_traits::{float::Float, Bounded, Num}; +use ::nalgebra::DVector; pub use self::bernoulli::Bernoulli; pub use self::beta::Beta; @@ -274,3 +275,36 @@ pub trait Discrete { /// ``` fn ln_pmf(&self, x: K) -> T; } + +/// The `ContinuousMultivariateCDF` trait is used to specify and interface +/// for univariate distributions for which cdf DVector float arguments are +/// sensible +pub trait ContinuousMultivariateCDF { + /// Returns the cumulative distribution function calculated + /// at `x` for a given multivariate distribution. May panic depending + /// on the implementor. + /// + /// # Examples + /// + /// ``` + /// use statrs::distribution::{ContinuousMultivariateCDF, MultivariateNormal}; + /// + /// let mvn = MultivariateNormal::new(vec![0., 0.], vec![1., 0., 0., 1.]).unwrap(); + /// assert_eq!(0.5, mvn.cdf([0., 0.,])); + /// ``` + fn cdf(&self, x: DVector) -> T; + + /// Returns the survival function calculated + /// at `x` for a given distribution. May panic depending + /// on the implementor. + /// + /// # Examples + /// + /// ``` + /// use statrs::distribution::{ContinuousMultivariateCDF, MultivariateNormal}; + /// + /// let mvs = MultivariateNormal::new(vec![0., 0.], vec![1., 0., 0., 1.]).unwrap(); + /// assert_eq!(f64::NEG_INFINITY, mvs.sf([f64::INFINITY, f64::INFINITY])); + /// ``` + fn sf(&self, x: DVector) -> T; +} From 1599f4809511ad80d3d662eb73e6e37b4706ef19 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Thu, 29 Dec 2022 01:49:16 +0100 Subject: [PATCH 02/12] feat: dynamic cholesky decomposition for multivariate normal cdf --- Cargo.toml | 1 + src/distribution/mod.rs | 2 +- src/distribution/multivariate_normal.rs | 107 +++++++++++++++++++++++- 3 files changed, 106 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d8e6eadf..031b69f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ nalgebra = { version = "0.29", features = ["rand"] } approx = "0.5.0" num-traits = "0.2.14" lazy_static = "1.4.0" +primes = "0.3.0" [dev-dependencies] criterion = "0.3.3" diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index d7daa863..dcc9f6e5 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -293,7 +293,7 @@ pub trait ContinuousMultivariateCDF { /// assert_eq!(0.5, mvn.cdf([0., 0.,])); /// ``` fn cdf(&self, x: DVector) -> T; - + /// Returns the survival function calculated /// at `x` for a given distribution. May panic depending /// on the implementor. diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index ab168020..b8d38b7b 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -1,5 +1,6 @@ -use crate::distribution::Continuous; -use crate::distribution::Normal; +use crate::distribution::{ + Continuous, ContinuousCDF, ContinuousMultivariateCDF, Normal +}; use crate::statistics::{Max, MeanN, Min, Mode, VarianceN}; use crate::{Result, StatsError}; use nalgebra::{ @@ -8,6 +9,7 @@ use nalgebra::{ }; use nalgebra::{DMatrix, DVector}; use rand::Rng; +use primes::{Sieve, PrimeSet}; use std::f64; use std::f64::consts::{E, PI}; @@ -29,7 +31,7 @@ use std::f64::consts::{E, PI}; #[derive(Debug, Clone, PartialEq)] pub struct MultivariateNormal { dim: usize, - cov_chol_decomp: DMatrix, + pub cov_chol_decomp: DMatrix, mu: DVector, cov: DMatrix, precision: DMatrix, @@ -98,6 +100,71 @@ impl MultivariateNormal { .ln(), ) } + + /// Returns the lower triangular cholesky decomposition of + /// self.cov wrt. switching of rows and columns of the matrix as well as mutating + /// the input `a` and `b` with the row switches + /// + /// Algorithm explained in 4.1.3 in ´Computation of Multivariate + /// Normal and t Probabilities´, Alan Genz. + pub fn chol_chrows(&self, a: &mut DVector, b: &mut DVector) -> DMatrix { + let mut cov = self.cov.clone(); + let mut chol_lower: DMatrix = DMatrix::zeros(self.dim, self.dim); + let mut y: DVector = DVector::zeros(self.dim); + + let std_normal = Normal::new(0., 1.).unwrap(); + for i in 0..self.dim { + let mut cdf_diff = f64::INFINITY; + let mut new_i = i; + let mut a_tilde = a[i]; + let mut b_tilde = b[i]; + // Find the index of which to switch rows with + for j in i..self.dim { + let mut num = 0.; + let mut den = 0.; + if i > 0 { + // Numerator: + // -Σ lᵢₘyₘ, sum from m=1 to i-1 + num = (chol_lower.index((j,..i)) * y.index((..i,0)))[0]; + // Denominator: + // √(σᵢᵢ - Σ lᵢₘ²), sum from m=1 to i-1 + den = (cov[(j,j)] - (chol_lower.index((j,..i)).transpose() * chol_lower.index((j,..i)))[0]).sqrt(); + } + let pot_a_tilde = (a[j] - num) / den; + let pot_b_tilde = (b[j] - num) / den; + let cdf_a = std_normal.cdf(pot_a_tilde); + let cdf_b = std_normal.cdf(pot_b_tilde); + + let pot_cdf_diff = cdf_b - cdf_a; // Potential minimum + if pot_cdf_diff < cdf_diff { + new_i = j; + cdf_diff = pot_cdf_diff; + a_tilde = pot_a_tilde; + b_tilde = pot_b_tilde; + } + } + if i != new_i { + cov.swap_rows(i,new_i); + cov.swap_columns(i,new_i); + a.swap_rows(i,new_i); + b.swap_rows(i,new_i); + chol_lower.swap_rows(i,new_i); + chol_lower.swap_columns(i,new_i); + } + + // Get the expected values: + // yᵢ = 1 / (Ψ(bᵢ) - Ψ(𝑎ᵢ)) * ∫_𝑎ᵢ^𝑏ᵢ sψ(s) ds + y[i] = ((-a_tilde.powi(2)/2.).exp() - (-b_tilde.powi(2)/2.).exp()) / (cdf_diff); + + // Cholesky decomposition algorithm with the new changed row + let mut ids = chol_lower.index_mut((..,..i+1)); // Get only the relevant indices + ids[(i,i)] = (cov[(i,i)] - (ids.index((i,..i)) * ids.index((i,..i)).transpose())[0]).sqrt(); + for j in i+1..self.dim { + ids[(j,i)] = (cov[(j,i)] - (ids.index((i,..i)) * ids.index((j,..i)).transpose())[0]) / ids[(i,i)]; + } + } + chol_lower + } } impl ::rand::distributions::Distribution> for MultivariateNormal { @@ -117,6 +184,40 @@ impl ::rand::distributions::Distribution> for MultivariateNormal { } } +impl ContinuousMultivariateCDF for MultivariateNormal { + /// Returns the cumulative distribution function at `x` for the + /// multivariate normal distribution, using approximation with + /// `N` points. Uses the algorithm as explained in + /// 'Computation of Multivariate Normal and t Probabilites', Section 4.2.2, + /// by Alan Genz. + fn cdf(&self, mut x: DVector) -> f64 { + // Shift integration limit wrt. mean + x -= &self.mu; + + let chol_lower = self.chol_chrows(&mut x, &mut DVector::zeros(self.dim)); + + // Generate first `dim` primes, Ricthmyer generators + // Could write function in crate for this instead if we + // want less imports. Efficiency in this part of the code does not + // matter much + let mut sqrt_primes = vec![0.; self.dim]; + let mut pset = Sieve::new(); + for (i, n) in pset.iter().enumerate().take(self.dim) { + sqrt_primes[i] = (n as f64).sqrt(); + } + + // let n_samples = 1000 * self.dim; + return 0. + } + + /// Returns the survival function at `x` for the + /// multivariate normal distribution, using approximation with + /// `N` points. + fn sf(&self, x: DVector) -> f64 { + 1. - self.cdf(x) + } +} + impl Min> for MultivariateNormal { /// Returns the minimum value in the domain of the /// multivariate normal distribution represented by a real vector From 8baa0dbb40b19a4dce5e92240571e3b1c2052892 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Thu, 29 Dec 2022 13:22:21 +0100 Subject: [PATCH 03/12] feat: Multivariate uniform distribution --- src/distribution/mod.rs | 2 + src/distribution/multivariate_uniform.rs | 489 +++++++++++++++++++++++ 2 files changed, 491 insertions(+) create mode 100644 src/distribution/multivariate_uniform.rs diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index dcc9f6e5..94d85777 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -27,6 +27,7 @@ pub use self::laplace::Laplace; pub use self::log_normal::LogNormal; pub use self::multinomial::Multinomial; pub use self::multivariate_normal::MultivariateNormal; +pub use self::multivariate_uniform::MultivariateUniform; pub use self::negative_binomial::NegativeBinomial; pub use self::normal::Normal; pub use self::pareto::Pareto; @@ -60,6 +61,7 @@ mod laplace; mod log_normal; mod multinomial; mod multivariate_normal; +mod multivariate_uniform; mod negative_binomial; mod normal; mod pareto; diff --git a/src/distribution/multivariate_uniform.rs b/src/distribution/multivariate_uniform.rs new file mode 100644 index 00000000..0b2a148c --- /dev/null +++ b/src/distribution/multivariate_uniform.rs @@ -0,0 +1,489 @@ +use crate::distribution::{Continuous, ContinuousMultivariateCDF}; +use crate::statistics::*; +use crate::{Result, StatsError}; +use nalgebra::DVector; +use rand::distributions::Uniform as RandUniform; +use rand::Rng; +use std::f64; + +/// Implements a Continuous Multivariate Uniform distribution +/// +/// # Examples +/// +/// ``` +/// use statrs::distribution::{MultivariateUniform, Continuous}; +/// use statrs::statistics::Distribution; +/// +/// let n = MultivariateUniform::new(vec![-1., 0.], vec![0., 1.]).unwrap(); +// /// assert_eq!(n.mean().unwrap(), 0.5); +// /// assert_eq!(n.pdf(0.5), 1.0); +/// ``` +#[derive(Debug, Clone, PartialEq)] +pub struct MultivariateUniform { + dim: usize, + min: DVector, + max: DVector, + sample_limits: Option>>, +} + +impl MultivariateUniform { + /// Constructs a new uniform distribution with a min in each dimension + /// of `min` and a max in each dimension of `max` + /// + /// # Errors + /// + /// Returns an error if any elements of `min` or `max` are `NaN` + /// + /// # Examples + /// + /// ``` + /// use statrs::distribution::MultivariateUniform; + /// use std::f64; + /// + /// let mut result = Uniform::new(vec![-1., 0.], vec![0., 1.,]); + /// assert!(result.is_ok()); + /// + /// result = Uniform::new(f64::NAN, f64::NAN); + /// result = Uniform::new(vec![0., f64::NAN], vec![f64::NAN, 1.]); + /// assert!(result.is_err()); + /// ``` + pub fn new(min: Vec, max: Vec) -> Result { + let min = DVector::from_vec(min); + let max = DVector::from_vec(max); + if min.iter().any(|f| f.is_nan()) + || max.iter().any(|f| f.is_nan()) + || min.len() != max.len() + || min.iter().zip(max.iter()).any(|(f,g)| f > g) + { + Err(StatsError::BadParams) + } else { + let dim = min.len(); + let mut sample_limits_unchecked: Vec>> = vec![None; min.len()]; + // If we have infinite values as min or max we can't sample + let sample_limits: Option>>; + if min.iter().any(|f| f == &f64::NEG_INFINITY) + || max.iter().any(|f| f == &f64::INFINITY) + { + sample_limits = None; + } else { + for i in 0..dim { + sample_limits_unchecked[i] = Some(RandUniform::new_inclusive(min[i],max[i])) + } + sample_limits = Some(DVector::from_vec( + sample_limits_unchecked.into_iter().flatten().collect::>>() + )); + } + Ok(MultivariateUniform { dim, min, max, sample_limits}) + } + } +} + +/// Returns an Option of sampled point in `self.dim` dimensions if the boundaries +/// are not positive or negative infinite, else return None +impl ::rand::distributions::Distribution>> for MultivariateUniform { + fn sample(&self, rng: &mut R) -> Option> { + match &self.sample_limits { + Some(sample_limits) => { + let mut samples: DVector = DVector::zeros(self.dim); + for i in 0..self.dim { + samples[i] = rng.sample(sample_limits[i]); + } + Some(samples) + }, + None => None + } + } +} + +impl ContinuousMultivariateCDF for MultivariateUniform { + /// Calculates the cumulative distribution function for the uniform + /// distribution + /// at `x` + /// + /// # Formula + /// + /// ```ignore + /// (x - min) / (max - min) + /// ``` + fn cdf(&self, x: DVector) -> f64 { + if x <= self.min { + 0. + } else if x >= self.max { + 1. + } else { + (x - &self.min).iter().product::() / self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).product::() + } + } + + /// Calculates the survival function for the uniform + /// distribution at `x` + /// + /// # Formula + /// + /// ```ignore + /// (max - x) / (max - min) + /// ``` + fn sf(&self, x: DVector) -> f64 { + if x <= self.min { + 1.0 + } else if x >= self.max { + 0.0 + } else { + (&self.max - x).iter().product::() / self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).product::() + } + } +} + +impl Min> for MultivariateUniform { + fn min(&self) -> DVector { + self.min.clone() + } +} + +impl Max> for MultivariateUniform { + fn max(&self) -> DVector { + self.max.clone() + } +} + +impl MeanN> for MultivariateUniform { + /// Returns the mean of the multivariate uniform distribution + fn mean(&self) -> Option> { + Some((&self.min + &self.max) / 2.) + } +} + +// impl VarianceN> for MultivariateUniform { +// /// Returns the covariance matrix of the multivariate uniform distribution +// fn variance(&self) -> Option> { +// Some(...) +// } +// } + +impl Median> for MultivariateUniform { + /// Returns the median for the continuous uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// (min + max) / 2 + /// ``` + fn median(&self) -> DVector { + (&self.min + &self.max) / 2.0 + } +} + +impl Mode>> for MultivariateUniform { + /// Returns the mode for the continuous uniform distribution + /// + /// # Remarks + /// + /// Since every element has an equal probability, mode simply + /// returns the middle element + /// + /// # Formula + /// + /// ```ignore + /// N/A // (max + min) / 2 for the middle element + /// ``` + fn mode(&self) -> Option> { + Some((&self.min + &self.max) / 2.0) + } +} + +impl<'a> Continuous<&'a DVector, f64> for MultivariateUniform { + /// Calculates the probability density function for the continuous uniform + /// distribution at `x` + /// + /// # Remarks + /// + /// Returns `0.0` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// 1 / ∏(max - min) + /// ``` + fn pdf(&self, x: &'a DVector) -> f64 { + if x < &self.min || x > &self.max { + 0.0 + } else { + 1. / self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).product::() + } + } + + /// Calculates the log probability density function for the continuous + /// uniform + /// distribution at `x` + /// + /// # Remarks + /// + /// Returns `f64::NEG_INFINITY` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// ln(1 / ∏(max - min)) = -ln(∑(max -min)) + /// ``` + fn ln_pdf(&self, x: &'a DVector) -> f64 { + if x < &self.min || x > &self.max { + f64::NEG_INFINITY + } else { + -self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).sum::().ln() + } + } +} + +#[rustfmt::skip] +// #[cfg(all(test, feature = "nightly"))] +mod tests { + use crate::statistics::*; + use crate::distribution::{ContinuousCDF, Continuous, MultivariateUniform}; + use crate::distribution::internal::*; + use crate::consts::ACC; + use nalgebra::DVector; + + fn try_create(min: Vec, max: Vec) -> MultivariateUniform { + let n = MultivariateUniform::new(min, max); + assert!(n.is_ok()); + n.unwrap() + } + + fn create_case(min: Vec, max: Vec) { + let n = try_create(min.clone(), max.clone()); + assert_eq!(n.min(), DVector::from_vec(min)); + assert_eq!(n.max(), DVector::from_vec(max)); + } + + fn bad_create_case(min: Vec, max: Vec) { + let n = MultivariateUniform::new(min, max); + assert!(n.is_err()); + } + + fn get_value(min: Vec, max: Vec, eval: F) -> DVector + where F: Fn(MultivariateUniform) -> DVector + { + let n = try_create(min, max); + eval(n) + } + + fn test_case(min: Vec, max: Vec, expected: DVector, eval: F) + where F: Fn(MultivariateUniform) -> DVector + { + let x = get_value(min, max, eval); + assert_eq!(expected, x); + } + + fn test_almost(min: Vec, max: Vec, expected: DVector, acc: f64, eval: F) + where F: Fn(MultivariateUniform) -> DVector + { + + let x = get_value(min, max, eval); + // assert_almost_eq!(expected, x, acc); + } + + macro_rules! dvec { + ($($x:expr),*) => (DVector::from_vec(vec![$($x),*])); + } + + #[test] + fn test_create() { + create_case(vec![0., 2.], vec![1., 3.,]); + create_case(vec![0.1, 0.], vec![0.2, 0.1,]); + create_case(vec![-5., 5., 10.], vec![-4., 6., 11.,]); + create_case(vec![0.], vec![1.]); + } + + #[test] + fn test_bad_create() { + bad_create_case(vec![f64::NAN, 5.], vec![0., 6.]); + bad_create_case(vec![1., 1.,], vec![0., 0.]); + bad_create_case(vec![0., 0.], vec![-1., 1.]); + bad_create_case(vec![0., 0.,], vec![f64::NAN, 3.]); + bad_create_case(vec![0.], vec![1., 2.]); + } + + // #[test] + // fn test_variance() { + // let variance = |x: MultivariateUniform| x.variance().unwrap(); + // test_case(-0.0, 2.0, 1.0 / 3.0, variance); + // test_case(0.0, 2.0, 1.0 / 3.0, variance); + // test_almost(0.1, 4.0, 1.2675, 1e-15, variance); + // test_case(10.0, 11.0, 1.0 / 12.0, variance); + // test_case(0.0, f64::INFINITY, f64::INFINITY, variance); + // } + + // #[test] + // fn test_entropy() { + // let entropy = |x: MultivariateUniform| x.entropy().unwrap(); + // test_case(-0.0, 2.0, 0.6931471805599453094172, entropy); + // test_case(0.0, 2.0, 0.6931471805599453094172, entropy); + // test_almost(0.1, 4.0, 1.360976553135600743431, 1e-15, entropy); + // test_case(1.0, 10.0, 2.19722457733621938279, entropy); + // test_case(10.0, 11.0, 0.0, entropy); + // test_case(0.0, f64::INFINITY, f64::INFINITY, entropy); + // } + + // #[test] + // fn test_skewness() { + // let skewness = |x: MultivariateUniform| x.skewness().unwrap(); + // test_case(-0.0, 2.0, 0.0, skewness); + // test_case(0.0, 2.0, 0.0, skewness); + // test_case(0.1, 4.0, 0.0, skewness); + // test_case(1.0, 10.0, 0.0, skewness); + // test_case(10.0, 11.0, 0.0, skewness); + // test_case(0.0, f64::INFINITY, 0.0, skewness); + // } + + #[test] + fn test_mode() { + let mode = |x: MultivariateUniform| x.mode().unwrap(); + test_case(vec![0., 0.,], vec![1., 1.,], dvec![0.5, 0.5], mode); + test_case(vec![-1., 0.], vec![1., 2.], dvec![0., 1.], mode); + test_case(vec![0., 0., f64::NEG_INFINITY], vec![1., 1., 0.], dvec![0.5, 0.5, f64::NEG_INFINITY], mode); + test_case(vec![0.], vec![f64::INFINITY], dvec![f64::INFINITY], mode); + } + + // #[test] + // fn test_median() { + // let median = |x: MultivariateUniform| x.median(); + // test_case(-0.0, 2.0, 1.0, median); + // test_case(0.0, 2.0, 1.0, median); + // test_case(0.1, 4.0, 2.05, median); + // test_case(1.0, 10.0, 5.5, median); + // test_case(10.0, 11.0, 10.5, median); + // test_case(0.0, f64::INFINITY, f64::INFINITY, median); + // } + + // #[test] + // fn test_pdf() { + // let pdf = |arg: f64| move |x: MultivariateUniform| x.pdf(arg); + // test_case(0.0, 0.0, 0.0, pdf(-5.0)); + // test_case(0.0, 0.0, f64::INFINITY, pdf(0.0)); + // test_case(0.0, 0.0, 0.0, pdf(5.0)); + // test_case(0.0, 0.1, 0.0, pdf(-5.0)); + // test_case(0.0, 0.1, 10.0, pdf(0.05)); + // test_case(0.0, 0.1, 0.0, pdf(5.0)); + // test_case(0.0, 1.0, 0.0, pdf(-5.0)); + // test_case(0.0, 1.0, 1.0, pdf(0.5)); + // test_case(0.0, 0.1, 0.0, pdf(5.0)); + // test_case(0.0, 10.0, 0.0, pdf(-5.0)); + // test_case(0.0, 10.0, 0.1, pdf(1.0)); + // test_case(0.0, 10.0, 0.1, pdf(5.0)); + // test_case(0.0, 10.0, 0.0, pdf(11.0)); + // test_case(-5.0, 100.0, 0.0, pdf(-10.0)); + // test_case(-5.0, 100.0, 0.009523809523809523809524, pdf(-5.0)); + // test_case(-5.0, 100.0, 0.009523809523809523809524, pdf(0.0)); + // test_case(-5.0, 100.0, 0.0, pdf(101.0)); + // test_case(0.0, f64::INFINITY, 0.0, pdf(-5.0)); + // test_case(0.0, f64::INFINITY, 0.0, pdf(10.0)); + // test_case(0.0, f64::INFINITY, 0.0, pdf(f64::INFINITY)); + // } + + // #[test] + // fn test_ln_pdf() { + // let ln_pdf = |arg: f64| move |x: MultivariateUniform| x.ln_pdf(arg); + // test_case(0.0, 0.0, f64::NEG_INFINITY, ln_pdf(-5.0)); + // test_case(0.0, 0.0, f64::INFINITY, ln_pdf(0.0)); + // test_case(0.0, 0.0, f64::NEG_INFINITY, ln_pdf(5.0)); + // test_case(0.0, 0.1, f64::NEG_INFINITY, ln_pdf(-5.0)); + // test_almost(0.0, 0.1, 2.302585092994045684018, 1e-15, ln_pdf(0.05)); + // test_case(0.0, 0.1, f64::NEG_INFINITY, ln_pdf(5.0)); + // test_case(0.0, 1.0, f64::NEG_INFINITY, ln_pdf(-5.0)); + // test_case(0.0, 1.0, 0.0, ln_pdf(0.5)); + // test_case(0.0, 0.1, f64::NEG_INFINITY, ln_pdf(5.0)); + // test_case(0.0, 10.0, f64::NEG_INFINITY, ln_pdf(-5.0)); + // test_case(0.0, 10.0, -2.302585092994045684018, ln_pdf(1.0)); + // test_case(0.0, 10.0, -2.302585092994045684018, ln_pdf(5.0)); + // test_case(0.0, 10.0, f64::NEG_INFINITY, ln_pdf(11.0)); + // test_case(-5.0, 100.0, f64::NEG_INFINITY, ln_pdf(-10.0)); + // test_case(-5.0, 100.0, -4.653960350157523371101, ln_pdf(-5.0)); + // test_case(-5.0, 100.0, -4.653960350157523371101, ln_pdf(0.0)); + // test_case(-5.0, 100.0, f64::NEG_INFINITY, ln_pdf(101.0)); + // test_case(0.0, f64::INFINITY, f64::NEG_INFINITY, ln_pdf(-5.0)); + // test_case(0.0, f64::INFINITY, f64::NEG_INFINITY, ln_pdf(10.0)); + // test_case(0.0, f64::INFINITY, f64::NEG_INFINITY, ln_pdf(f64::INFINITY)); + // } + + // #[test] + // fn test_cdf() { + // let cdf = |arg: f64| move |x: MultivariateUniform| x.cdf(arg); + // test_case(0.0, 0.0, 0.0, cdf(0.0)); + // test_case(0.0, 0.1, 0.5, cdf(0.05)); + // test_case(0.0, 1.0, 0.5, cdf(0.5)); + // test_case(0.0, 10.0, 0.1, cdf(1.0)); + // test_case(0.0, 10.0, 0.5, cdf(5.0)); + // test_case(-5.0, 100.0, 0.0, cdf(-5.0)); + // test_case(-5.0, 100.0, 0.04761904761904761904762, cdf(0.0)); + // test_case(0.0, f64::INFINITY, 0.0, cdf(10.0)); + // test_case(0.0, f64::INFINITY, 1.0, cdf(f64::INFINITY)); + // } + + // #[test] + // fn test_cdf_lower_bound() { + // let cdf = |arg: f64| move |x: MultivariateUniform| x.cdf(arg); + // test_case(0.0, 3.0, 0.0, cdf(-1.0)); + // } + + // #[test] + // fn test_cdf_upper_bound() { + // let cdf = |arg: f64| move |x: MultivariateUniform| x.cdf(arg); + // test_case(0.0, 3.0, 1.0, cdf(5.0)); + // } + + + // #[test] + // fn test_sf() { + // let sf = |arg: f64| move |x: MultivariateUniform| x.sf(arg); + // test_case(0.0, 0.0, 1.0, sf(0.0)); + // test_case(0.0, 0.1, 0.5, sf(0.05)); + // test_case(0.0, 1.0, 0.5, sf(0.5)); + // test_case(0.0, 10.0, 0.9, sf(1.0)); + // test_case(0.0, 10.0, 0.5, sf(5.0)); + // test_case(-5.0, 100.0, 1.0, sf(-5.0)); + // test_case(-5.0, 100.0, 0.9523809523809523, sf(0.0)); + // test_case(0.0, f64::INFINITY, 1.0, sf(10.0)); + // test_case(0.0, f64::INFINITY, 0.0, sf(f64::INFINITY)); + // } + + // #[test] + // fn test_sf_lower_bound() { + // let sf = |arg: f64| move |x: MultivariateUniform| x.sf(arg); + // test_case(0.0, 3.0, 1.0, sf(-1.0)); + // } + + // #[test] + // fn test_sf_upper_bound() { + // let sf = |arg: f64| move |x: MultivariateUniform| x.sf(arg); + // test_case(0.0, 3.0, 0.0, sf(5.0)); + // } + + // #[test] + // fn test_continuous() { + // test::check_continuous_distribution(&try_create(0.0, 10.0), 0.0, 10.0); + // test::check_continuous_distribution(&try_create(-2.0, 15.0), -2.0, 15.0); + // } + + // #[test] + // fn test_samples_in_range() { + // use rand::rngs::StdRng; + // use rand::SeedableRng; + // use rand::distributions::Distribution; + + // let seed = [ + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + // 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + // ]; + // let mut r: StdRng = SeedableRng::from_seed(seed); + + // let min = -0.5; + // let max = 0.5; + // let num_trials = 10_000; + // let n = try_create(min, max); + + // assert!((0..num_trials) + // .map(|_| n.sample::(&mut r)) + // .all(|v| (min <= v) && (v < max)) + // ); + // } +} From 8dc5308c9727e98688ffb6804cd0bde227d453bf Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Thu, 29 Dec 2022 16:54:38 +0100 Subject: [PATCH 04/12] tests: mtv uniform pdf, cdf, sf --- src/distribution/multivariate_uniform.rs | 371 ++++++++++------------- 1 file changed, 161 insertions(+), 210 deletions(-) diff --git a/src/distribution/multivariate_uniform.rs b/src/distribution/multivariate_uniform.rs index 0b2a148c..97921cba 100644 --- a/src/distribution/multivariate_uniform.rs +++ b/src/distribution/multivariate_uniform.rs @@ -15,8 +15,8 @@ use std::f64; /// use statrs::statistics::Distribution; /// /// let n = MultivariateUniform::new(vec![-1., 0.], vec![0., 1.]).unwrap(); -// /// assert_eq!(n.mean().unwrap(), 0.5); -// /// assert_eq!(n.pdf(0.5), 1.0); +/// assert_eq!(n.mean().unwrap(), DVector::from_vec(vec![-0.5, 0.5]); +/// assert_eq!(n.pdf(DVector::from_vec(vec![-0.5, 0.5])), 1.); /// ``` #[derive(Debug, Clone, PartialEq)] pub struct MultivariateUniform { @@ -27,7 +27,7 @@ pub struct MultivariateUniform { } impl MultivariateUniform { - /// Constructs a new uniform distribution with a min in each dimension + /// Constructs a new uniform distribution with a min in each dimension /// of `min` and a max in each dimension of `max` /// /// # Errors @@ -51,9 +51,9 @@ impl MultivariateUniform { let min = DVector::from_vec(min); let max = DVector::from_vec(max); if min.iter().any(|f| f.is_nan()) - || max.iter().any(|f| f.is_nan()) + || max.iter().any(|f| f.is_nan()) || min.len() != max.len() - || min.iter().zip(max.iter()).any(|(f,g)| f > g) + || min.iter().zip(max.iter()).any(|(f, g)| f > g) { Err(StatsError::BadParams) } else { @@ -67,15 +67,45 @@ impl MultivariateUniform { sample_limits = None; } else { for i in 0..dim { - sample_limits_unchecked[i] = Some(RandUniform::new_inclusive(min[i],max[i])) + sample_limits_unchecked[i] = Some(RandUniform::new_inclusive(min[i], max[i])) } sample_limits = Some(DVector::from_vec( - sample_limits_unchecked.into_iter().flatten().collect::>>() + sample_limits_unchecked + .into_iter() + .flatten() + .collect::>>(), )); } - Ok(MultivariateUniform { dim, min, max, sample_limits}) + Ok(MultivariateUniform { + dim, + min, + max, + sample_limits, + }) } } + + /// Returns a uniform distribution on the unit hypercube [0,1]^dim + pub fn standard(dim: usize) -> Result { + let mut sample_limits: Vec>> = vec![None; dim]; + let min = DVector::from_vec(vec![0.; dim]); + let max = DVector::from_vec(vec![1.; dim]); + for i in 0..dim { + sample_limits[i] = Some(RandUniform::new_inclusive(0., 1.)) + } + let sample_limits = Some(DVector::from_vec( + sample_limits + .into_iter() + .flatten() + .collect::>>(), + )); + Ok(MultivariateUniform { + dim, + min, + max, + sample_limits, + }) + } } /// Returns an Option of sampled point in `self.dim` dimensions if the boundaries @@ -89,8 +119,8 @@ impl ::rand::distributions::Distribution>> for MultivariateU samples[i] = rng.sample(sample_limits[i]); } Some(samples) - }, - None => None + } + None => None, } } } @@ -106,13 +136,18 @@ impl ContinuousMultivariateCDF for MultivariateUniform { /// (x - min) / (max - min) /// ``` fn cdf(&self, x: DVector) -> f64 { - if x <= self.min { - 0. - } else if x >= self.max { - 1. - } else { - (x - &self.min).iter().product::() / self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).product::() + let mut p = 1.; + for i in 0..self.dim { + if x[i] <= self.min[i] + || (self.max[i].is_infinite() || self.min[i].is_infinite()) && x[i] < self.max[i] + { + p = 0.; + break; + } else if x[i] < self.max[i] { + p *= (x[i] - self.min[i]) / (self.max[i] - self.min[i]) + } } + return p; } /// Calculates the survival function for the uniform @@ -124,13 +159,18 @@ impl ContinuousMultivariateCDF for MultivariateUniform { /// (max - x) / (max - min) /// ``` fn sf(&self, x: DVector) -> f64 { - if x <= self.min { - 1.0 - } else if x >= self.max { - 0.0 - } else { - (&self.max - x).iter().product::() / self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).product::() + let mut p = 1.; + for i in 0..self.dim { + if x[i] >= self.max[i] + || (self.max[i].is_infinite() || self.min[i].is_infinite()) && x[i] > self.min[i] + { + p = 0.; + break; + } else if x[i] > self.min[i] { + p *= (self.max[i] - x[i]) / (self.max[i] - self.min[i]) + } } + return p; } } @@ -153,13 +193,6 @@ impl MeanN> for MultivariateUniform { } } -// impl VarianceN> for MultivariateUniform { -// /// Returns the covariance matrix of the multivariate uniform distribution -// fn variance(&self) -> Option> { -// Some(...) -// } -// } - impl Median> for MultivariateUniform { /// Returns the median for the continuous uniform distribution /// @@ -205,10 +238,18 @@ impl<'a> Continuous<&'a DVector, f64> for MultivariateUniform { /// 1 / ∏(max - min) /// ``` fn pdf(&self, x: &'a DVector) -> f64 { - if x < &self.min || x > &self.max { + if x.iter() + .zip(self.min.iter().zip(self.max.iter())) + .any(|(f, (g, h))| f < g || f > h) + { 0.0 } else { - 1. / self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).product::() + 1. / self + .min + .iter() + .zip(self.max.iter()) + .map(|(f, g)| g - f) + .product::() } } @@ -223,24 +264,35 @@ impl<'a> Continuous<&'a DVector, f64> for MultivariateUniform { /// # Formula /// /// ```ignore - /// ln(1 / ∏(max - min)) = -ln(∑(max -min)) + /// ln(1 / ∏(max - min)) = -∑ln(max -min)) /// ``` fn ln_pdf(&self, x: &'a DVector) -> f64 { - if x < &self.min || x > &self.max { + if x.iter() + .zip(self.min.iter().zip(self.max.iter())) + .any(|(f, (g, h))| f < g || f > h) + { f64::NEG_INFINITY } else { - -self.min.iter().zip(self.max.iter()).map(|(f,g)| g - f).sum::().ln() + -self + .min + .iter() + .zip(self.max.iter()) + .map(|(f, g)| (g - f).ln()) + .sum::() } } } #[rustfmt::skip] -// #[cfg(all(test, feature = "nightly"))] +#[cfg(all(test, feature = "nightly"))] mod tests { use crate::statistics::*; - use crate::distribution::{ContinuousCDF, Continuous, MultivariateUniform}; + use crate::distribution::{ + ContinuousCDF, Continuous, MultivariateUniform, ContinuousMultivariateCDF + }; use crate::distribution::internal::*; use crate::consts::ACC; + use core::fmt::Debug; use nalgebra::DVector; fn try_create(min: Vec, max: Vec) -> MultivariateUniform { @@ -260,15 +312,17 @@ mod tests { assert!(n.is_err()); } - fn get_value(min: Vec, max: Vec, eval: F) -> DVector - where F: Fn(MultivariateUniform) -> DVector + fn get_value(min: Vec, max: Vec, eval: F) -> T + where F: FnOnce(MultivariateUniform) -> T { let n = try_create(min, max); eval(n) } - fn test_case(min: Vec, max: Vec, expected: DVector, eval: F) - where F: Fn(MultivariateUniform) -> DVector + fn test_case(min: Vec, max: Vec, expected: T, eval: F) + where + T: Debug + PartialEq, + F: FnOnce(MultivariateUniform) -> T { let x = get_value(min, max, eval); assert_eq!(expected, x); @@ -292,6 +346,8 @@ mod tests { create_case(vec![0.1, 0.], vec![0.2, 0.1,]); create_case(vec![-5., 5., 10.], vec![-4., 6., 11.,]); create_case(vec![0.], vec![1.]); + create_case(vec![0.; 100], vec![1.; 100]); + create_case(vec![1.; 5], vec![1.; 5]); } #[test] @@ -301,40 +357,9 @@ mod tests { bad_create_case(vec![0., 0.], vec![-1., 1.]); bad_create_case(vec![0., 0.,], vec![f64::NAN, 3.]); bad_create_case(vec![0.], vec![1., 2.]); + bad_create_case(vec![0.; 10], vec![-1.; 10]); } - // #[test] - // fn test_variance() { - // let variance = |x: MultivariateUniform| x.variance().unwrap(); - // test_case(-0.0, 2.0, 1.0 / 3.0, variance); - // test_case(0.0, 2.0, 1.0 / 3.0, variance); - // test_almost(0.1, 4.0, 1.2675, 1e-15, variance); - // test_case(10.0, 11.0, 1.0 / 12.0, variance); - // test_case(0.0, f64::INFINITY, f64::INFINITY, variance); - // } - - // #[test] - // fn test_entropy() { - // let entropy = |x: MultivariateUniform| x.entropy().unwrap(); - // test_case(-0.0, 2.0, 0.6931471805599453094172, entropy); - // test_case(0.0, 2.0, 0.6931471805599453094172, entropy); - // test_almost(0.1, 4.0, 1.360976553135600743431, 1e-15, entropy); - // test_case(1.0, 10.0, 2.19722457733621938279, entropy); - // test_case(10.0, 11.0, 0.0, entropy); - // test_case(0.0, f64::INFINITY, f64::INFINITY, entropy); - // } - - // #[test] - // fn test_skewness() { - // let skewness = |x: MultivariateUniform| x.skewness().unwrap(); - // test_case(-0.0, 2.0, 0.0, skewness); - // test_case(0.0, 2.0, 0.0, skewness); - // test_case(0.1, 4.0, 0.0, skewness); - // test_case(1.0, 10.0, 0.0, skewness); - // test_case(10.0, 11.0, 0.0, skewness); - // test_case(0.0, f64::INFINITY, 0.0, skewness); - // } - #[test] fn test_mode() { let mode = |x: MultivariateUniform| x.mode().unwrap(); @@ -344,146 +369,72 @@ mod tests { test_case(vec![0.], vec![f64::INFINITY], dvec![f64::INFINITY], mode); } - // #[test] - // fn test_median() { - // let median = |x: MultivariateUniform| x.median(); - // test_case(-0.0, 2.0, 1.0, median); - // test_case(0.0, 2.0, 1.0, median); - // test_case(0.1, 4.0, 2.05, median); - // test_case(1.0, 10.0, 5.5, median); - // test_case(10.0, 11.0, 10.5, median); - // test_case(0.0, f64::INFINITY, f64::INFINITY, median); - // } - - // #[test] - // fn test_pdf() { - // let pdf = |arg: f64| move |x: MultivariateUniform| x.pdf(arg); - // test_case(0.0, 0.0, 0.0, pdf(-5.0)); - // test_case(0.0, 0.0, f64::INFINITY, pdf(0.0)); - // test_case(0.0, 0.0, 0.0, pdf(5.0)); - // test_case(0.0, 0.1, 0.0, pdf(-5.0)); - // test_case(0.0, 0.1, 10.0, pdf(0.05)); - // test_case(0.0, 0.1, 0.0, pdf(5.0)); - // test_case(0.0, 1.0, 0.0, pdf(-5.0)); - // test_case(0.0, 1.0, 1.0, pdf(0.5)); - // test_case(0.0, 0.1, 0.0, pdf(5.0)); - // test_case(0.0, 10.0, 0.0, pdf(-5.0)); - // test_case(0.0, 10.0, 0.1, pdf(1.0)); - // test_case(0.0, 10.0, 0.1, pdf(5.0)); - // test_case(0.0, 10.0, 0.0, pdf(11.0)); - // test_case(-5.0, 100.0, 0.0, pdf(-10.0)); - // test_case(-5.0, 100.0, 0.009523809523809523809524, pdf(-5.0)); - // test_case(-5.0, 100.0, 0.009523809523809523809524, pdf(0.0)); - // test_case(-5.0, 100.0, 0.0, pdf(101.0)); - // test_case(0.0, f64::INFINITY, 0.0, pdf(-5.0)); - // test_case(0.0, f64::INFINITY, 0.0, pdf(10.0)); - // test_case(0.0, f64::INFINITY, 0.0, pdf(f64::INFINITY)); - // } - - // #[test] - // fn test_ln_pdf() { - // let ln_pdf = |arg: f64| move |x: MultivariateUniform| x.ln_pdf(arg); - // test_case(0.0, 0.0, f64::NEG_INFINITY, ln_pdf(-5.0)); - // test_case(0.0, 0.0, f64::INFINITY, ln_pdf(0.0)); - // test_case(0.0, 0.0, f64::NEG_INFINITY, ln_pdf(5.0)); - // test_case(0.0, 0.1, f64::NEG_INFINITY, ln_pdf(-5.0)); - // test_almost(0.0, 0.1, 2.302585092994045684018, 1e-15, ln_pdf(0.05)); - // test_case(0.0, 0.1, f64::NEG_INFINITY, ln_pdf(5.0)); - // test_case(0.0, 1.0, f64::NEG_INFINITY, ln_pdf(-5.0)); - // test_case(0.0, 1.0, 0.0, ln_pdf(0.5)); - // test_case(0.0, 0.1, f64::NEG_INFINITY, ln_pdf(5.0)); - // test_case(0.0, 10.0, f64::NEG_INFINITY, ln_pdf(-5.0)); - // test_case(0.0, 10.0, -2.302585092994045684018, ln_pdf(1.0)); - // test_case(0.0, 10.0, -2.302585092994045684018, ln_pdf(5.0)); - // test_case(0.0, 10.0, f64::NEG_INFINITY, ln_pdf(11.0)); - // test_case(-5.0, 100.0, f64::NEG_INFINITY, ln_pdf(-10.0)); - // test_case(-5.0, 100.0, -4.653960350157523371101, ln_pdf(-5.0)); - // test_case(-5.0, 100.0, -4.653960350157523371101, ln_pdf(0.0)); - // test_case(-5.0, 100.0, f64::NEG_INFINITY, ln_pdf(101.0)); - // test_case(0.0, f64::INFINITY, f64::NEG_INFINITY, ln_pdf(-5.0)); - // test_case(0.0, f64::INFINITY, f64::NEG_INFINITY, ln_pdf(10.0)); - // test_case(0.0, f64::INFINITY, f64::NEG_INFINITY, ln_pdf(f64::INFINITY)); - // } - - // #[test] - // fn test_cdf() { - // let cdf = |arg: f64| move |x: MultivariateUniform| x.cdf(arg); - // test_case(0.0, 0.0, 0.0, cdf(0.0)); - // test_case(0.0, 0.1, 0.5, cdf(0.05)); - // test_case(0.0, 1.0, 0.5, cdf(0.5)); - // test_case(0.0, 10.0, 0.1, cdf(1.0)); - // test_case(0.0, 10.0, 0.5, cdf(5.0)); - // test_case(-5.0, 100.0, 0.0, cdf(-5.0)); - // test_case(-5.0, 100.0, 0.04761904761904761904762, cdf(0.0)); - // test_case(0.0, f64::INFINITY, 0.0, cdf(10.0)); - // test_case(0.0, f64::INFINITY, 1.0, cdf(f64::INFINITY)); - // } - - // #[test] - // fn test_cdf_lower_bound() { - // let cdf = |arg: f64| move |x: MultivariateUniform| x.cdf(arg); - // test_case(0.0, 3.0, 0.0, cdf(-1.0)); - // } - - // #[test] - // fn test_cdf_upper_bound() { - // let cdf = |arg: f64| move |x: MultivariateUniform| x.cdf(arg); - // test_case(0.0, 3.0, 1.0, cdf(5.0)); - // } - - - // #[test] - // fn test_sf() { - // let sf = |arg: f64| move |x: MultivariateUniform| x.sf(arg); - // test_case(0.0, 0.0, 1.0, sf(0.0)); - // test_case(0.0, 0.1, 0.5, sf(0.05)); - // test_case(0.0, 1.0, 0.5, sf(0.5)); - // test_case(0.0, 10.0, 0.9, sf(1.0)); - // test_case(0.0, 10.0, 0.5, sf(5.0)); - // test_case(-5.0, 100.0, 1.0, sf(-5.0)); - // test_case(-5.0, 100.0, 0.9523809523809523, sf(0.0)); - // test_case(0.0, f64::INFINITY, 1.0, sf(10.0)); - // test_case(0.0, f64::INFINITY, 0.0, sf(f64::INFINITY)); - // } - - // #[test] - // fn test_sf_lower_bound() { - // let sf = |arg: f64| move |x: MultivariateUniform| x.sf(arg); - // test_case(0.0, 3.0, 1.0, sf(-1.0)); - // } - - // #[test] - // fn test_sf_upper_bound() { - // let sf = |arg: f64| move |x: MultivariateUniform| x.sf(arg); - // test_case(0.0, 3.0, 0.0, sf(5.0)); - // } - - // #[test] - // fn test_continuous() { - // test::check_continuous_distribution(&try_create(0.0, 10.0), 0.0, 10.0); - // test::check_continuous_distribution(&try_create(-2.0, 15.0), -2.0, 15.0); - // } + #[test] + fn test_median() { + let median = |x: MultivariateUniform| x.median(); + test_case(vec![-5., 5.], vec![0., 10.], dvec![-2.5, 7.5], median); + test_case(vec![10.0, 5.0], vec![11.0, 6.0], dvec![10.5, 5.5], median); + test_case(vec![0., 0., 0.,], vec![1., 2., 3.,], dvec![0.5, 1., 1.5], median); + test_case(vec![f64::NEG_INFINITY, f64::NEG_INFINITY], vec![0., 0.], dvec![f64::NEG_INFINITY, f64::NEG_INFINITY], median); + } - // #[test] - // fn test_samples_in_range() { - // use rand::rngs::StdRng; - // use rand::SeedableRng; - // use rand::distributions::Distribution; + #[test] + fn test_pdf() { + let pdf = |arg: Vec| move |x: MultivariateUniform| x.pdf(&DVector::from_vec(arg)); + test_case(vec![0., 0.,], vec![1., 1.,], 0.0, pdf(vec![-10., -10.])); + test_case(vec![0., 0.,], vec![1., 1.,], 0.0, pdf(vec![-10., 0.5])); + test_case(vec![0., 0.,], vec![1., 1.,], 0.0, pdf(vec![2., 0.5])); + test_case(vec![0., 0.,], vec![1., 1.,], 0.0, pdf(vec![2., 2.])); + test_case(vec![0., 0.,], vec![1., 1.,], 1., pdf(vec![0.5, 0.5])); + test_case(vec![0., 0.,], vec![1., 1.,], 1., pdf(vec![1., 0.8])); + test_case(vec![0., 0.,], vec![1., 1.,], 1., pdf(vec![1., 1.])); + test_case(vec![-5., -10.], vec![5., 10.], 0.005, pdf(vec![0., 0.])); + test_case(vec![0., f64::NEG_INFINITY], vec![1., 0.], 0., pdf(vec![0.5, -1.])); + test_case(vec![-1., -2., -4.], vec![1., 2., 4.,], 0.5*0.25*0.125, pdf(vec![0., 0., 0.])); + test_case(vec![0.], vec![f64::INFINITY], 0., pdf(vec![200.])) + } - // let seed = [ - // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - // 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 - // ]; - // let mut r: StdRng = SeedableRng::from_seed(seed); + #[test] + fn test_ln_pdf() { + let ln_pdf = |arg: Vec| move |x: MultivariateUniform| x.ln_pdf(&DVector::from_vec(arg)); + test_case(vec![0., 0.,], vec![1., 1.,], 0., ln_pdf(vec![1., 1.])); + test_case(vec![0., 0.,], vec![1., 1.,], 0., ln_pdf(vec![0., 0.])); + test_case(vec![0., 0.,], vec![1., 1.,], 0., ln_pdf(vec![0.5, 0.2])); + test_case(vec![-1., -1.], vec![1., 1.], f64::NEG_INFINITY, ln_pdf(vec![-2., -2.])); + test_case(vec![0.; 10], vec![1.; 10], 0., ln_pdf(vec![0.5; 10])); + test_case(vec![0.; 10], vec![f64::INFINITY; 10], f64::NEG_INFINITY, ln_pdf(vec![f64::NEG_INFINITY; 10])); + test_case(vec![0.; 10], vec![f64::INFINITY; 10], f64::NEG_INFINITY, ln_pdf(vec![f64::INFINITY; 10])); + } - // let min = -0.5; - // let max = 0.5; - // let num_trials = 10_000; - // let n = try_create(min, max); + #[test] + fn test_cdf() { + let cdf = |arg: Vec| move |x: MultivariateUniform| x.cdf(DVector::from_vec(arg)); + test_case(vec![0., 0.], vec![1., 1.], 0.25, cdf(vec![0.5, 0.5])); + test_case(vec![0., 0.], vec![1., 1.], 0.0, cdf(vec![-1., 0.5])); + test_case(vec![0., 0.], vec![1., 1.], 0.0, cdf(vec![0.5, -1.])); + test_case(vec![0., 0.], vec![1., 1.], 1., cdf(vec![1., 1.])); + test_case(vec![0., 0.], vec![1., 1.], 1., cdf(vec![2., 2.])); + test_case(vec![0.; 100], vec![1.; 100], 0.5_f64.powi(100), cdf(vec![0.5; 100])); + test_case(vec![0.; 100], vec![1.; 100], 1., cdf(vec![1.5; 100])); + test_case(vec![0.; 5], vec![1.; 5], 0., cdf(vec![1., 1., 1., 1., -1.])); + test_case(vec![0.; 5], vec![1.; 5], 0.5, cdf(vec![1., 1., 1., 1., 0.5])); + test_case(vec![f64::NEG_INFINITY, 0.], vec![0., f64::INFINITY], 0., cdf(vec![-1., 1.])); + test_case(vec![f64::NEG_INFINITY, 0.], vec![0., f64::INFINITY], 1., cdf(vec![0., f64::INFINITY])); + } - // assert!((0..num_trials) - // .map(|_| n.sample::(&mut r)) - // .all(|v| (min <= v) && (v < max)) - // ); - // } + #[test] + fn test_sf() { + let sf = |arg: Vec| move |x: MultivariateUniform| x.sf(DVector::from_vec(arg)); + test_case(vec![0., 0.], vec![1., 1.], 0.25, sf(vec![0.5, 0.5])); + test_case(vec![0., 0.], vec![1., 1.], 0.5, sf(vec![-1., 0.5])); + test_case(vec![0., 0.], vec![1., 1.], 0.5, sf(vec![0.5, -1.])); + test_case(vec![0., 0.], vec![1., 1.], 0., sf(vec![1., 1.])); + test_case(vec![0., 0.], vec![1., 1.], 0., sf(vec![2., 2.])); + test_case(vec![0.; 100], vec![1.; 100], 0.5_f64.powi(100), sf(vec![0.5; 100])); + test_case(vec![0.; 100], vec![1.; 100], 0., sf(vec![1.5; 100])); + test_case(vec![0.; 5], vec![1.; 5], 0., sf(vec![1., 1., 1., 1., -1.])); + test_case(vec![0.; 5], vec![1.; 5], 0.5, sf(vec![0., 0., 0., 0., 0.5])); + test_case(vec![f64::NEG_INFINITY, 0.], vec![0., f64::INFINITY], 0., sf(vec![-1., 1.])); + test_case(vec![f64::NEG_INFINITY, 0.], vec![0., f64::INFINITY], 1., sf(vec![f64::NEG_INFINITY, 0.])); + } } From a7c1e367dc1f540f74b733aaf0a114e6ed30bca0 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Thu, 29 Dec 2022 18:44:29 +0100 Subject: [PATCH 05/12] fix: set den = sqrt(cov_jj) --- src/distribution/multivariate_normal.rs | 39 ++++++++++++++----------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index b8d38b7b..a8b0cf6b 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -121,20 +121,22 @@ impl MultivariateNormal { // Find the index of which to switch rows with for j in i..self.dim { let mut num = 0.; - let mut den = 0.; + let mut den = cov[(j,j)].sqrt(); if i > 0 { // Numerator: // -Σ lᵢₘyₘ, sum from m=1 to i-1 - num = (chol_lower.index((j,..i)) * y.index((..i,0)))[0]; + num = (chol_lower.index((j, ..i)) * y.index((..i, 0)))[0]; // Denominator: // √(σᵢᵢ - Σ lᵢₘ²), sum from m=1 to i-1 - den = (cov[(j,j)] - (chol_lower.index((j,..i)).transpose() * chol_lower.index((j,..i)))[0]).sqrt(); - } + den = (cov[(j, j)] + - (chol_lower.index((j, ..i)).transpose() * chol_lower.index((j, ..i)))[0]) + .sqrt(); + } let pot_a_tilde = (a[j] - num) / den; let pot_b_tilde = (b[j] - num) / den; let cdf_a = std_normal.cdf(pot_a_tilde); let cdf_b = std_normal.cdf(pot_b_tilde); - + let pot_cdf_diff = cdf_b - cdf_a; // Potential minimum if pot_cdf_diff < cdf_diff { new_i = j; @@ -144,23 +146,26 @@ impl MultivariateNormal { } } if i != new_i { - cov.swap_rows(i,new_i); - cov.swap_columns(i,new_i); - a.swap_rows(i,new_i); - b.swap_rows(i,new_i); - chol_lower.swap_rows(i,new_i); - chol_lower.swap_columns(i,new_i); + cov.swap_rows(i, new_i); + cov.swap_columns(i, new_i); + a.swap_rows(i, new_i); + b.swap_rows(i, new_i); + chol_lower.swap_rows(i, new_i); + chol_lower.swap_columns(i, new_i); } // Get the expected values: - // yᵢ = 1 / (Ψ(bᵢ) - Ψ(𝑎ᵢ)) * ∫_𝑎ᵢ^𝑏ᵢ sψ(s) ds - y[i] = ((-a_tilde.powi(2)/2.).exp() - (-b_tilde.powi(2)/2.).exp()) / (cdf_diff); + // yᵢ = 1 / (Ψ(bᵢ) - Ψ(𝑎ᵢ)) * ∫_𝑎ᵢ^𝑏ᵢ sψ(s) ds + y[i] = ((-a_tilde.powi(2) / 2.).exp() - (-b_tilde.powi(2) / 2.).exp()) / ((2. * PI).sqrt() * cdf_diff); // Cholesky decomposition algorithm with the new changed row - let mut ids = chol_lower.index_mut((..,..i+1)); // Get only the relevant indices - ids[(i,i)] = (cov[(i,i)] - (ids.index((i,..i)) * ids.index((i,..i)).transpose())[0]).sqrt(); - for j in i+1..self.dim { - ids[(j,i)] = (cov[(j,i)] - (ids.index((i,..i)) * ids.index((j,..i)).transpose())[0]) / ids[(i,i)]; + let mut ids = chol_lower.index_mut((.., ..i + 1)); // Get only the relevant indices + ids[(i, i)] = + (cov[(i, i)] - (ids.index((i, ..i)) * ids.index((i, ..i)).transpose())[0]).sqrt(); + for j in i + 1..self.dim { + ids[(j, i)] = (cov[(j, i)] + - (ids.index((i, ..i)) * ids.index((j, ..i)).transpose())[0]) + / ids[(i, i)]; } } chol_lower From 96a5e58883afc7d5be191eeca39ac6e62cca61f9 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Thu, 29 Dec 2022 19:01:13 +0100 Subject: [PATCH 06/12] feat: Multivariate normal cdf using quasi-MC --- src/distribution/multivariate_normal.rs | 53 ++++++++++++++++++++----- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index a8b0cf6b..8066a873 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -1,5 +1,5 @@ use crate::distribution::{ - Continuous, ContinuousCDF, ContinuousMultivariateCDF, Normal + Continuous, ContinuousCDF, ContinuousMultivariateCDF, MultivariateUniform, Normal, }; use crate::statistics::{Max, MeanN, Min, Mode, VarianceN}; use crate::{Result, StatsError}; @@ -8,8 +8,9 @@ use nalgebra::{ LU, U1, }; use nalgebra::{DMatrix, DVector}; +use primes::{PrimeSet, Sieve}; +use rand::distributions::Distribution; use rand::Rng; -use primes::{Sieve, PrimeSet}; use std::f64; use std::f64::consts::{E, PI}; @@ -121,7 +122,7 @@ impl MultivariateNormal { // Find the index of which to switch rows with for j in i..self.dim { let mut num = 0.; - let mut den = cov[(j,j)].sqrt(); + let mut den = cov[(j, j)].sqrt(); if i > 0 { // Numerator: // -Σ lᵢₘyₘ, sum from m=1 to i-1 @@ -156,7 +157,8 @@ impl MultivariateNormal { // Get the expected values: // yᵢ = 1 / (Ψ(bᵢ) - Ψ(𝑎ᵢ)) * ∫_𝑎ᵢ^𝑏ᵢ sψ(s) ds - y[i] = ((-a_tilde.powi(2) / 2.).exp() - (-b_tilde.powi(2) / 2.).exp()) / ((2. * PI).sqrt() * cdf_diff); + y[i] = ((-a_tilde.powi(2) / 2.).exp() - (-b_tilde.powi(2) / 2.).exp()) + / ((2. * PI).sqrt() * cdf_diff); // Cholesky decomposition algorithm with the new changed row let mut ids = chol_lower.index_mut((.., ..i + 1)); // Get only the relevant indices @@ -199,20 +201,51 @@ impl ContinuousMultivariateCDF for MultivariateNormal { // Shift integration limit wrt. mean x -= &self.mu; - let chol_lower = self.chol_chrows(&mut x, &mut DVector::zeros(self.dim)); + let chol_lower = self.chol_chrows( + &mut DVector::from_vec(vec![f64::NEG_INFINITY; self.dim]), + &mut x, + ); // Generate first `dim` primes, Ricthmyer generators // Could write function in crate for this instead if we - // want less imports. Efficiency in this part of the code does not - // matter much - let mut sqrt_primes = vec![0.; self.dim]; + // want less imports. Efficiency here does not matter much + let mut sqrt_primes = DVector::zeros(self.dim); let mut pset = Sieve::new(); for (i, n) in pset.iter().enumerate().take(self.dim) { sqrt_primes[i] = (n as f64).sqrt(); } - // let n_samples = 1000 * self.dim; - return 0. + let n_samples = 15; + let n_points = 1000 * self.dim; + let mvu = MultivariateUniform::standard(self.dim).unwrap(); + let std_normal = Normal::new(0., 1.).unwrap(); + let mut rng = rand::thread_rng(); + let mut p = 0.; // The cdf probability + for i in 0..n_samples { + let rnd_points = mvu.sample(&mut rng).unwrap(); + let mut sum_i = 0.; + for j in 0..n_points { + let w = + (2. * DVector::from_vec( + ((j as f64) * &sqrt_primes + &rnd_points) + .iter() + .map(|x| x % 1.) + .collect::>(), + ) - DVector::from_vec(vec![1.; self.dim])) + .abs(); + let mut y: DVector = DVector::zeros(self.dim - 1); + let mut fi = std_normal.cdf(x[0] / chol_lower[(0, 0)]); + for m in 1..self.dim { + y[m - 1] = std_normal.inverse_cdf(w[m - 1] * fi); + let cdf_arg = (x[m] - (chol_lower.index((m, ..m)) * y.index((..m, 0)))[0]) + / chol_lower[(m, m)]; + fi *= std_normal.cdf(cdf_arg); + } + sum_i += (fi - sum_i) / ((j + 1) as f64); + } + p += (sum_i - p) / ((i + 1) as f64); + } + return p; } /// Returns the survival function at `x` for the From 12ba8043c04ce1310b09b7508d21454d9fa82526 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Thu, 29 Dec 2022 21:34:02 +0100 Subject: [PATCH 07/12] tests: cdf for multivariate normal --- src/distribution/multivariate_normal.rs | 40 +++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index 8066a873..3859c0e8 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -237,8 +237,12 @@ impl ContinuousMultivariateCDF for MultivariateNormal { let mut fi = std_normal.cdf(x[0] / chol_lower[(0, 0)]); for m in 1..self.dim { y[m - 1] = std_normal.inverse_cdf(w[m - 1] * fi); - let cdf_arg = (x[m] - (chol_lower.index((m, ..m)) * y.index((..m, 0)))[0]) + let mut cdf_arg = (x[m] - (chol_lower.index((m, ..m)) * y.index((..m, 0)))[0]) / chol_lower[(m, m)]; + if cdf_arg.is_nan() { + // Either -inf, 0 or inf + cdf_arg = f64::INFINITY; + } fi *= std_normal.cdf(cdf_arg); } sum_i += (fi - sum_i) / ((j + 1) as f64); @@ -342,7 +346,7 @@ impl<'a> Continuous<&'a DVector, f64> for MultivariateNormal { } #[rustfmt::skip] -#[cfg(all(test, feature = "nightly"))] +// #[cfg(all(test, feature = "nightly"))] mod tests { use crate::distribution::{Continuous, MultivariateNormal}; use crate::statistics::*; @@ -398,10 +402,26 @@ mod tests { assert_almost_eq!(expected, x, acc); } + fn identity_vec(dim: usize) -> Vec { + let id: DMatrix = DMatrix::identity(dim,dim); + return id.data.into(); + } + + fn cov_matrix(dim: usize, eye_factor: f64, off_diag_factor: f64) -> Vec { + let id: DMatrix = eye_factor * DMatrix::identity(dim,dim); + let rho = DMatrix::from_vec(dim,dim,vec![off_diag_factor; dim*dim]); + return (id - DMatrix::identity(dim,dim)*off_diag_factor + rho).data.into() + } + use super::*; macro_rules! dvec { - ($($x:expr),*) => (DVector::from_vec(vec![$($x),*])); + ($($x:expr),*) => { + DVector::from_vec(vec![$($x),*]) + }; + ($($x:expr)?;$($y:expr)?) => { + DVector::from_vec(vec![$($x)?;$($y)?]) + }; } macro_rules! mat2 { @@ -492,4 +512,18 @@ mod tests { test_case(vec![0., 0.], vec![f64::INFINITY, 0., 0., f64::INFINITY], f64::NEG_INFINITY, ln_pdf(dvec![10., 10.])); test_case(vec![0., 0.], vec![f64::INFINITY, 0., 0., f64::INFINITY], f64::NEG_INFINITY, ln_pdf(dvec![100., 100.])); } + + #[test] + fn test_cdf() { + let cdf = |arg: DVector<_>| move |x: MultivariateNormal| x.cdf(arg); + test_case(vec![0., 0., 0.], identity_vec(3), 0., cdf(dvec![f64::NEG_INFINITY; 3])); + test_case(vec![0., 0., 0.], identity_vec(3), 1., cdf(dvec![f64::INFINITY; 3])); + test_case(vec![1., -1., 10.], identity_vec(3), 1., cdf(dvec![f64::INFINITY; 3])); + test_almost(vec![-5., 0., 5.], cov_matrix(3, 1., 0.5), 0.23397186, 1e-5, cdf(dvec![-2., 1., 4.3])); + test_almost(vec![1., 0., 2.], cov_matrix(3, 1., 0.9), 0.0663303, 1e-5, cdf(dvec![0.5; 3])); + test_almost(vec![-0.5, 1.1], cov_matrix(2, 1., 0.2), 0.700540224, 1e-5, cdf(dvec![0.5, 2.])); + // These fail... whilst the ones above don't + test_almost(vec![10., 10., 10.], cov_matrix(3, 5., 1.5), 0.5945585970, 1e-5, cdf(dvec![12.; 3])); + test_almost(vec![1.; 15], cov_matrix(15, 2., 0.5), 0.121248186, 1e-5, cdf(dvec![1.; 15])); + } } From c1381cacdbcbfcfe193762dfd3a4f7a5c78a1a8d Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Fri, 30 Dec 2022 11:43:52 +0100 Subject: [PATCH 08/12] feat: integrate pdf from limits a -> b --- src/distribution/multivariate_normal.rs | 105 ++++++++++++++---------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index 3859c0e8..5681b6eb 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -108,7 +108,7 @@ impl MultivariateNormal { /// /// Algorithm explained in 4.1.3 in ´Computation of Multivariate /// Normal and t Probabilities´, Alan Genz. - pub fn chol_chrows(&self, a: &mut DVector, b: &mut DVector) -> DMatrix { + fn chol_chrows(&self, a: &mut DVector, b: &mut DVector) -> DMatrix { let mut cov = self.cov.clone(); let mut chol_lower: DMatrix = DMatrix::zeros(self.dim, self.dim); let mut y: DVector = DVector::zeros(self.dim); @@ -172,39 +172,12 @@ impl MultivariateNormal { } chol_lower } -} -impl ::rand::distributions::Distribution> for MultivariateNormal { - /// Samples from the multivariate normal distribution - /// - /// # Formula - /// L * Z + μ - /// - /// where `L` is the Cholesky decomposition of the covariance matrix, - /// `Z` is a vector of normally distributed random variables, and - /// `μ` is the mean vector - - fn sample(&self, rng: &mut R) -> DVector { - let d = Normal::new(0., 1.).unwrap(); - let z = DVector::::from_distribution(self.dim, &d, rng); - (&self.cov_chol_decomp * z) + &self.mu - } -} - -impl ContinuousMultivariateCDF for MultivariateNormal { - /// Returns the cumulative distribution function at `x` for the - /// multivariate normal distribution, using approximation with - /// `N` points. Uses the algorithm as explained in + /// Uses the algorithm as explained in /// 'Computation of Multivariate Normal and t Probabilites', Section 4.2.2, /// by Alan Genz. - fn cdf(&self, mut x: DVector) -> f64 { - // Shift integration limit wrt. mean - x -= &self.mu; - - let chol_lower = self.chol_chrows( - &mut DVector::from_vec(vec![f64::NEG_INFINITY; self.dim]), - &mut x, - ); + fn integrate_pdf(&self, a: &mut DVector, b: &mut DVector) -> (f64, f64) { + let chol_lower = self.chol_chrows(a, b); // Generate first `dim` primes, Ricthmyer generators // Could write function in crate for this instead if we @@ -220,6 +193,13 @@ impl ContinuousMultivariateCDF for MultivariateNormal { let mvu = MultivariateUniform::standard(self.dim).unwrap(); let std_normal = Normal::new(0., 1.).unwrap(); let mut rng = rand::thread_rng(); + + let one = DVector::from_vec(vec![1.; self.dim]); + let mut y: DVector = DVector::zeros(self.dim - 1); + + let alpha = 3.; + let mut err = 0.; + let mut err_help = 0.; let mut p = 0.; // The cdf probability for i in 0..n_samples { let rnd_points = mvu.sample(&mut rng).unwrap(); @@ -231,25 +211,62 @@ impl ContinuousMultivariateCDF for MultivariateNormal { .iter() .map(|x| x % 1.) .collect::>(), - ) - DVector::from_vec(vec![1.; self.dim])) - .abs(); - let mut y: DVector = DVector::zeros(self.dim - 1); - let mut fi = std_normal.cdf(x[0] / chol_lower[(0, 0)]); + ) - &one) + .abs(); + let mut di = std_normal.cdf(a[0] / chol_lower[(0, 0)]); + let mut ei = std_normal.cdf(b[0] / chol_lower[(0, 0)]); + let mut fi = ei - di; for m in 1..self.dim { - y[m - 1] = std_normal.inverse_cdf(w[m - 1] * fi); - let mut cdf_arg = (x[m] - (chol_lower.index((m, ..m)) * y.index((..m, 0)))[0]) - / chol_lower[(m, m)]; - if cdf_arg.is_nan() { - // Either -inf, 0 or inf - cdf_arg = f64::INFINITY; + y[m - 1] = std_normal.inverse_cdf(di + w[m - 1] * (ei - di)); + let mut num = (chol_lower.index((m, ..m)) * y.index((..m, 0)))[0]; + let den = chol_lower[(m, m)]; + if num.is_nan() { + // Either -inf, 0 or inf, comes when yᵢ = -inf and chol_lowerₘᵢ = 0 + num = 0.; } - fi *= std_normal.cdf(cdf_arg); + di = std_normal.cdf((a[m] - num) / den); + ei = std_normal.cdf((b[m] - num) / den); + fi *= ei - di; } sum_i += (fi - sum_i) / ((j + 1) as f64); } - p += (sum_i - p) / ((i + 1) as f64); + let delta = (sum_i - p) / ((i + 1) as f64); + p += delta; + err_help = (((i - 1) as f64) * err_help / ((i + 1) as f64)) + delta.powi(2); + err = alpha * err_help.sqrt() } - return p; + return (p, err); + } +} + +impl ::rand::distributions::Distribution> for MultivariateNormal { + /// Samples from the multivariate normal distribution + /// + /// # Formula + /// L * Z + μ + /// + /// where `L` is the Cholesky decomposition of the covariance matrix, + /// `Z` is a vector of normally distributed random variables, and + /// `μ` is the mean vector + + fn sample(&self, rng: &mut R) -> DVector { + let d = Normal::new(0., 1.).unwrap(); + let z = DVector::::from_distribution(self.dim, &d, rng); + (&self.cov_chol_decomp * z) + &self.mu + } +} + +impl ContinuousMultivariateCDF for MultivariateNormal { + /// Returns the cumulative distribution function at `x` for the + /// multivariate normal distribution + fn cdf(&self, mut x: DVector) -> f64 { + // Shift integration limit wrt. mean + x -= &self.mu; + let (p, _) = self.integrate_pdf( + &mut DVector::from_vec(vec![f64::NEG_INFINITY; self.dim]), + &mut x, + ); + p } /// Returns the survival function at `x` for the From fa171e01113eaad6afd8273f86c2b86304d1c4db Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Fri, 30 Dec 2022 12:24:54 +0100 Subject: [PATCH 09/12] tests: more cdf tests --- src/distribution/multivariate_normal.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index 5681b6eb..cd5153b8 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -363,7 +363,7 @@ impl<'a> Continuous<&'a DVector, f64> for MultivariateNormal { } #[rustfmt::skip] -// #[cfg(all(test, feature = "nightly"))] +#[cfg(all(test, feature = "nightly"))] mod tests { use crate::distribution::{Continuous, MultivariateNormal}; use crate::statistics::*; @@ -424,9 +424,9 @@ mod tests { return id.data.into(); } - fn cov_matrix(dim: usize, eye_factor: f64, off_diag_factor: f64) -> Vec { - let id: DMatrix = eye_factor * DMatrix::identity(dim,dim); - let rho = DMatrix::from_vec(dim,dim,vec![off_diag_factor; dim*dim]); + fn cov_matrix(dim: usize, diag_factor: f64, off_diag_factor: f64) -> Vec { + let id = DMatrix::from_diagonal_element(dim,dim,diag_factor); + let rho = DMatrix::repeat(dim, dim, off_diag_factor); return (id - DMatrix::identity(dim,dim)*off_diag_factor + rho).data.into() } @@ -536,11 +536,14 @@ mod tests { test_case(vec![0., 0., 0.], identity_vec(3), 0., cdf(dvec![f64::NEG_INFINITY; 3])); test_case(vec![0., 0., 0.], identity_vec(3), 1., cdf(dvec![f64::INFINITY; 3])); test_case(vec![1., -1., 10.], identity_vec(3), 1., cdf(dvec![f64::INFINITY; 3])); + test_almost(vec![0., 0., 0.], cov_matrix(3, 1., 0.1), 0.119415222, 1e-5, cdf(dvec![-0.1; 3])); test_almost(vec![-5., 0., 5.], cov_matrix(3, 1., 0.5), 0.23397186, 1e-5, cdf(dvec![-2., 1., 4.3])); test_almost(vec![1., 0., 2.], cov_matrix(3, 1., 0.9), 0.0663303, 1e-5, cdf(dvec![0.5; 3])); test_almost(vec![-0.5, 1.1], cov_matrix(2, 1., 0.2), 0.700540224, 1e-5, cdf(dvec![0.5, 2.])); - // These fail... whilst the ones above don't test_almost(vec![10., 10., 10.], cov_matrix(3, 5., 1.5), 0.5945585970, 1e-5, cdf(dvec![12.; 3])); - test_almost(vec![1.; 15], cov_matrix(15, 2., 0.5), 0.121248186, 1e-5, cdf(dvec![1.; 15])); + test_almost(vec![1.; 4], cov_matrix(4, 2., 0.5), 0.1264796225, 1e-5, cdf(dvec![1.; 4])); + test_almost(vec![1.; 15], cov_matrix(15, 2., 0.5), 0.011545573, 1e-5, cdf(dvec![1.; 15])); + test_almost(vec![-100., -150., 150.], vec![1., 0.2, 0.9, 0.2, 1., 0.3, 0.9, 0.3, 1.], 0.999999713, 1e-5, cdf(dvec![-90., -140., 155.])); + test_almost(vec![0.5,0.2,1.1], vec![1., 0.2, 0.9, 0.2, 1., 0.3, 0.9, 0.3, 1.], 0.07532228836, 1e-5, cdf(dvec![-0.9, 1.3, 2.2])); } } From 17d9a8b64fe3c8de6b53b2eb55ef3ca63a35e745 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Fri, 30 Dec 2022 12:38:03 +0100 Subject: [PATCH 10/12] fix: assert in docstring for multivariate normal cdf --- src/distribution/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index 94d85777..514132ff 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -292,7 +292,7 @@ pub trait ContinuousMultivariateCDF { /// use statrs::distribution::{ContinuousMultivariateCDF, MultivariateNormal}; /// /// let mvn = MultivariateNormal::new(vec![0., 0.], vec![1., 0., 0., 1.]).unwrap(); - /// assert_eq!(0.5, mvn.cdf([0., 0.,])); + /// assert_eq!(0.25, mvn.cdf([0., 0.,])); /// ``` fn cdf(&self, x: DVector) -> T; From ce14d58980f1233e0efa115954cc057716b4c000 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Fri, 30 Dec 2022 12:44:06 +0100 Subject: [PATCH 11/12] fix: remove pub for cov_chol_decomp --- src/distribution/multivariate_normal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index cd5153b8..58a41c12 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -32,7 +32,7 @@ use std::f64::consts::{E, PI}; #[derive(Debug, Clone, PartialEq)] pub struct MultivariateNormal { dim: usize, - pub cov_chol_decomp: DMatrix, + cov_chol_decomp: DMatrix, mu: DVector, cov: DMatrix, precision: DMatrix, From 6b02c960fd161394284e6110a4fbb3c4222ac6d7 Mon Sep 17 00:00:00 2001 From: Henry Jacobson Date: Tue, 27 Feb 2024 23:17:30 +0100 Subject: [PATCH 12/12] fix: docstring tests --- src/distribution/mod.rs | 6 ++++-- src/distribution/multivariate_normal.rs | 1 + src/distribution/multivariate_uniform.rs | 14 ++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index 514132ff..e6a2fd92 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -290,9 +290,10 @@ pub trait ContinuousMultivariateCDF { /// /// ``` /// use statrs::distribution::{ContinuousMultivariateCDF, MultivariateNormal}; + /// use nalgebra::DVector; /// /// let mvn = MultivariateNormal::new(vec![0., 0.], vec![1., 0., 0., 1.]).unwrap(); - /// assert_eq!(0.25, mvn.cdf([0., 0.,])); + /// assert_eq!(0.25, mvn.cdf(DVector::from_vec(vec![0., 0.,]))); /// ``` fn cdf(&self, x: DVector) -> T; @@ -304,9 +305,10 @@ pub trait ContinuousMultivariateCDF { /// /// ``` /// use statrs::distribution::{ContinuousMultivariateCDF, MultivariateNormal}; + /// use nalgebra::DVector; /// /// let mvs = MultivariateNormal::new(vec![0., 0.], vec![1., 0., 0., 1.]).unwrap(); - /// assert_eq!(f64::NEG_INFINITY, mvs.sf([f64::INFINITY, f64::INFINITY])); + /// assert_eq!(0., mvs.sf(DVector::from_vec(vec![f64::INFINITY, f64::INFINITY]))); /// ``` fn sf(&self, x: DVector) -> T; } diff --git a/src/distribution/multivariate_normal.rs b/src/distribution/multivariate_normal.rs index 58a41c12..6cb2f52a 100644 --- a/src/distribution/multivariate_normal.rs +++ b/src/distribution/multivariate_normal.rs @@ -273,6 +273,7 @@ impl ContinuousMultivariateCDF for MultivariateNormal { /// multivariate normal distribution, using approximation with /// `N` points. fn sf(&self, x: DVector) -> f64 { + // Shift integration limit wrt. mean 1. - self.cdf(x) } } diff --git a/src/distribution/multivariate_uniform.rs b/src/distribution/multivariate_uniform.rs index 97921cba..c5946499 100644 --- a/src/distribution/multivariate_uniform.rs +++ b/src/distribution/multivariate_uniform.rs @@ -12,11 +12,12 @@ use std::f64; /// /// ``` /// use statrs::distribution::{MultivariateUniform, Continuous}; -/// use statrs::statistics::Distribution; +/// use statrs::statistics::{Distribution, MeanN}; +/// use nalgebra::DVector; /// /// let n = MultivariateUniform::new(vec![-1., 0.], vec![0., 1.]).unwrap(); -/// assert_eq!(n.mean().unwrap(), DVector::from_vec(vec![-0.5, 0.5]); -/// assert_eq!(n.pdf(DVector::from_vec(vec![-0.5, 0.5])), 1.); +/// assert_eq!(n.mean().unwrap(), DVector::from_vec(vec![-0.5, 0.5])); +/// assert_eq!(n.pdf(&DVector::from_vec(vec![-0.5, 0.5])), 1.); /// ``` #[derive(Debug, Clone, PartialEq)] pub struct MultivariateUniform { @@ -40,11 +41,12 @@ impl MultivariateUniform { /// use statrs::distribution::MultivariateUniform; /// use std::f64; /// - /// let mut result = Uniform::new(vec![-1., 0.], vec![0., 1.,]); + /// let mut result = MultivariateUniform::new(vec![-1., 0.], vec![0., 1.,]); /// assert!(result.is_ok()); /// - /// result = Uniform::new(f64::NAN, f64::NAN); - /// result = Uniform::new(vec![0., f64::NAN], vec![f64::NAN, 1.]); + /// result = MultivariateUniform::new(vec![f64::NAN], vec![f64::NAN]); + /// assert!(result.is_err()); + /// result = MultivariateUniform::new(vec![0., f64::NAN], vec![f64::NAN, 1.]); /// assert!(result.is_err()); /// ``` pub fn new(min: Vec, max: Vec) -> Result {