diff --git a/.gitignore b/.gitignore index a9d37c5..b53cdd6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +/src/btree_multiset.rs diff --git a/Cargo.toml b/Cargo.toml index 2e370ce..3e7b6f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,6 @@ description = "Multisets/bags" keywords = ["multiset","bag","data-structure","collection","count"] license = "MIT/Apache-2.0" authors = ["Jake Mitchell "] + +[dev-dependencies] +quickcheck = "0.9.1" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..9c1efb7 --- /dev/null +++ b/build.rs @@ -0,0 +1,14 @@ +fn main() { + let btree_multiset_code = std::fs::read_to_string("./src/hash_multiset.rs") + .expect("Could not open hash_multiset source file") + .replace("Hash + Eq", "Ord") + .replace("Eq + Hash", "Ord") + .replace("hash_map::", "btree_map::") + .replace("HashMap", "BTreeMap") + .replace("HashMultiSet", "BTreeMultiSet") + .replace("use std::hash::Hash;\n", "") + .replace("hash-based multiset", "tree-based multiset"); + std::fs::write("./src/btree_multiset.rs", btree_multiset_code.as_bytes()) + .expect("Could not write btree_multiset file"); + println!("cargo:rerun-if-changed=./src/hash_multiset.rs"); +} diff --git a/src/multiset.rs b/src/hash_multiset.rs similarity index 91% rename from src/multiset.rs rename to src/hash_multiset.rs index b93045d..e2e0e2a 100644 --- a/src/multiset.rs +++ b/src/hash_multiset.rs @@ -7,9 +7,10 @@ // except according to those terms. #![warn(missing_docs)] +use super::Iter; + use std::borrow::Borrow; -use std::collections::hash_map; -use std::collections::hash_map::{Entry, Keys}; +use std::collections::hash_map::{self, Entry, Keys}; use std::collections::HashMap; use std::fmt; use std::hash::Hash; @@ -26,36 +27,6 @@ where size: usize, } -/// An iterator over the items of a `HashMultiSet`. -/// -/// This `struct` is created by the [`iter`] method on [`HashMultiSet`]. -#[derive(Clone)] -pub struct Iter<'a, K: 'a> { - iter: hash_map::Iter<'a, K, usize>, - duplicate: Option<(&'a K, &'a usize)>, - duplicate_index: usize, -} - -impl<'a, K> Iterator for Iter<'a, K> { - type Item = &'a K; - - fn next(&mut self) -> Option<&'a K> { - if self.duplicate.is_none() { - self.duplicate = self.iter.next(); - } - if let Some((key, count)) = self.duplicate { - self.duplicate_index += 1; - if self.duplicate_index >= *count { - self.duplicate = None; - self.duplicate_index = 0; - } - Some(key) - } else { - None - } - } -} - impl HashMultiSet where K: Eq + Hash, @@ -94,12 +65,8 @@ where /// } /// assert_eq!(3, multiset.iter().count()); /// ``` - pub fn iter(&self) -> Iter { - Iter { - iter: self.elem_counts.iter(), - duplicate: None, - duplicate_index: 0, - } + pub fn iter(&self) -> Iter<&K, &usize, hash_map::Iter> { + Iter::new(self.elem_counts.iter(), self.size) } /// Returns true if the multiset contains no elements. @@ -120,10 +87,6 @@ where /// Returns `true` if the multiset contains a value. /// - /// The value may be any borrowed form of the set's value type, but - /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for - /// the value type. - /// /// # Examples /// /// ``` @@ -136,7 +99,7 @@ where pub fn contains(&self, value: &Q) -> bool where K: Borrow, - Q: Hash + Eq, + Q: Eq + Hash, { self.elem_counts.contains_key(value) } diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..bf1d62b --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,174 @@ +// Copyright 2019 multiset developers +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +#![warn(missing_docs)] + +use std::borrow::Borrow; +use std::marker::PhantomData; + +/// An iterator over the items of a `MultiSet`. +/// +/// This `struct` is created by the [`iter`](super::HashMultiSet::iter) method on +/// [`HashMultiSet`](super::HashMultiSet) or [`BTreeMultiSet`](super::BTreeMultiSet). +#[derive(Clone)] +pub struct Iter, InnerIter: Iterator> { + pub(crate) iter: InnerIter, + pub(crate) duplicate: Option<::Item>, + pub(crate) duplicate_index: usize, + pub(crate) duplicate_back: Option<::Item>, + pub(crate) duplicate_index_back: usize, + pub(crate) len: usize, + pub(crate) _ghost: PhantomData<*const (K, V)>, +} + +impl, InnerIter: Iterator + ExactSizeIterator> + Iter +{ + pub(crate) fn new(iter: InnerIter, len: usize) -> Self { + Iter { + iter, + duplicate: None, + duplicate_index: 0, + duplicate_back: None, + duplicate_index_back: 0, + len, + _ghost: PhantomData, + } + } +} + +impl, InnerIter: Iterator> Iterator + for Iter +{ + type Item = K; + + fn next(&mut self) -> Option { + if self.duplicate.is_none() { + self.duplicate = self.iter.next(); + } + if let Some((key, count)) = self.duplicate.as_ref() { + self.duplicate_index += 1; + let key = key.clone(); + if self.duplicate_index >= *count.borrow() { + self.duplicate = None; + self.duplicate_index = 0; + } + self.len -= 1; + Some(key) + } else { + if let Some((key, count)) = self.duplicate_back.as_ref() { + self.duplicate_index_back += 1; + let key = key.clone(); + if self.duplicate_index_back >= *count.borrow() { + self.duplicate_back = None; + } + self.len -= 1; + Some(key) + } else { + None + } + } + } + + fn count(self) -> usize { + self.len() + } + + fn fold(self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + let duplicate_index = self.duplicate_index; + let duplicate_index_back = self.duplicate_index_back; + self.duplicate + .map(move |(val, count)| (val, *count.borrow() - duplicate_index)) + .into_iter() + .chain(self.iter.map(move |(val, count)| (val, *count.borrow()))) + .chain( + self.duplicate_back + .map(move |(val, count)| (val, *count.borrow() - duplicate_index_back)) + .into_iter(), + ) + .fold(init, move |acc, (val, count)| { + (0..count).fold(acc, |acc, _| f(acc, val.clone())) + }) + } + + fn size_hint(&self) -> (usize, Option) { + let l = self.len(); + (l, Some(l)) + } +} + +impl, InnerIter: Iterator> ExactSizeIterator + for Iter +{ + fn len(&self) -> usize { + self.len + } +} + +impl, InnerIter: Iterator + DoubleEndedIterator> + DoubleEndedIterator for Iter +{ + fn next_back(&mut self) -> Option { + if self.duplicate_back.is_none() { + self.duplicate_back = self.iter.next_back(); + } + if let Some((key, count)) = self.duplicate_back.as_ref() { + self.duplicate_index_back += 1; + let key = key.clone(); + if self.duplicate_index_back >= *count.borrow() { + self.duplicate_back = None; + self.duplicate_index_back = 0; + } + self.len -= 1; + Some(key) + } else { + if let Some((key, count)) = self.duplicate.as_ref() { + self.duplicate_index += 1; + let key = key.clone(); + if self.duplicate_index >= *count.borrow() { + self.duplicate = None; + } + self.len -= 1; + Some(key) + } else { + None + } + } + } + + fn rfold(self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + let duplicate_index = self.duplicate_index; + let duplicate_index_back = self.duplicate_index_back; + self.duplicate_back + .map(move |(val, count)| (val, *count.borrow() - duplicate_index_back)) + .into_iter() + .chain( + self.iter + .rev() + .map(move |(val, count)| (val, *count.borrow())), + ) + .chain( + self.duplicate + .map(move |(val, count)| (val, *count.borrow() - duplicate_index)) + .into_iter(), + ) + .fold(init, move |acc, (val, count)| { + (0..count).fold(acc, |acc, _| f(acc, val.clone())) + }) + } +} + +impl, InnerIter: Iterator + std::iter::FusedIterator> + std::iter::FusedIterator for Iter +{ +} diff --git a/src/lib.rs b/src/lib.rs index 308c2bd..795e8f4 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,10 @@ //! permit duplicates. Consequently, they're useful for maintaining a //! count of distinct values. -mod multiset; +mod btree_multiset; +mod hash_multiset; +mod iter; -pub use multiset::{HashMultiSet, Iter}; +pub use btree_multiset::BTreeMultiSet; +pub use hash_multiset::HashMultiSet; +pub use iter::Iter; diff --git a/tests/specializations.rs b/tests/specializations.rs new file mode 100644 index 0000000..be6c009 --- /dev/null +++ b/tests/specializations.rs @@ -0,0 +1,96 @@ +#[macro_use] +extern crate quickcheck; + +use std::fmt::Debug; +use std::ops::BitXor; + +struct Unspecialized(I); +impl Iterator for Unspecialized +where + I: Iterator, +{ + type Item = I::Item; + + #[inline(always)] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +fn check_specialized<'a, V, IterItem, Iter, F>(iterator: &Iter, mapper: F) +where + V: Eq + Debug, + IterItem: 'a, + Iter: Iterator + Clone + 'a, + F: Fn(Box + 'a>) -> V, +{ + assert_eq!( + mapper(Box::new(Unspecialized(iterator.clone()))), + mapper(Box::new(iterator.clone())) + ) +} + +fn check_specialized_count_last_nth_sizeh<'a, IterItem, Iter>( + it: &Iter, + known_expected_size: Option, +) where + IterItem: 'a + Eq + Debug, + Iter: Iterator + Clone + 'a, +{ + let size = it.clone().count(); + if let Some(expected_size) = known_expected_size { + assert_eq!(size, expected_size); + } + check_specialized(it, |i| i.count()); + check_specialized(it, |i| i.last()); + for n in 0..size + 2 { + check_specialized(it, |mut i| i.nth(n)); + } + let mut it_sh = it.clone(); + for n in 0..size + 2 { + let len = it_sh.clone().count(); + let (min, max) = it_sh.size_hint(); + assert_eq!((size - n.min(size)), len); + assert!(min <= len); + if let Some(max) = max { + assert!(len <= max); + } + it_sh.next(); + } +} + +fn check_specialized_fold_xor<'a, IterItem, Iter>(it: &Iter) +where + IterItem: 'a + + BitXor + + Eq + + Debug + + BitXor<::Output, Output = ::Output> + + Clone, + ::Output: + BitXor::Output> + Eq + Debug + Clone, + Iter: Iterator + Clone + 'a, +{ + check_specialized(it, |mut i| { + let first = i.next().map(|f| f.clone() ^ (f.clone() ^ f)); + i.fold(first, |acc, v: IterItem| acc.map(move |a| v ^ a)) + }); +} + +fn hms_test(test_vec: Vec, known_expected_size: Option) { + let hms: multiset::HashMultiSet<_> = test_vec.into_iter().collect(); + let iter = hms.iter(); + check_specialized_count_last_nth_sizeh(&iter, known_expected_size.map(|x| x + 1)); + check_specialized_fold_xor(&iter) +} + +quickcheck! { + fn hms_test_qc(test_vec: Vec) -> () { + hms_test(test_vec, None) + } +}