From 567aadbe1c6882b13c9596b31622a80817426c1b Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 30 Nov 2019 23:05:40 +0100 Subject: [PATCH 1/9] Implement BTreeMultiset --- Cargo.toml | 2 +- src/btree_multiset.rs | 518 ++++++++++++++++++++++++++ src/{multiset.rs => hash_multiset.rs} | 47 +-- src/iter.rs | 60 +++ src/lib.rs | 8 +- 5 files changed, 590 insertions(+), 45 deletions(-) create mode 100644 src/btree_multiset.rs rename src/{multiset.rs => hash_multiset.rs} (92%) create mode 100644 src/iter.rs diff --git a/Cargo.toml b/Cargo.toml index 2e370ce..bb1c5a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "multiset" -version = "0.0.5" +version = "0.1.0" repository = "https://github.com/jmitchell/multiset" description = "Multisets/bags" keywords = ["multiset","bag","data-structure","collection","count"] diff --git a/src/btree_multiset.rs b/src/btree_multiset.rs new file mode 100644 index 0000000..ac32f8f --- /dev/null +++ b/src/btree_multiset.rs @@ -0,0 +1,518 @@ +// Copyright 2019 multiset developers +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +#![warn(missing_docs)] + +use super::Iter; + +use std::borrow::Borrow; +use std::collections::btree_map::{self, Entry, Keys}; +use std::collections::BTreeMap; +use std::fmt; +use std::iter::{FromIterator, IntoIterator}; +use std::ops::{Add, Sub}; + +#[derive(Clone)] +/// A tree-based multiset. +pub struct BTreeMultiSet { + elem_counts: BTreeMap, + size: usize, +} + +impl BTreeMultiSet +where + K: Ord, +{ + /// Creates a new empty `BTreeMultiSet`. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// ``` + pub fn new() -> Self { + BTreeMultiSet { + elem_counts: BTreeMap::new(), + size: 0, + } + } + + /// An iterator visiting all elements in arbitrary order, including each duplicate. + /// The iterator element type is `&'a K`. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// let mut multiset = BTreeMultiSet::new(); + /// multiset.insert(0); + /// multiset.insert(0); + /// multiset.insert(1); + /// + /// // Will print in an arbitrary order. + /// for x in multiset.iter() { + /// println!("{}", x); + /// } + /// assert_eq!(3, multiset.iter().count()); + /// ``` + pub fn iter(&self) -> Iter<&K, &usize, btree_map::Iter> { + Iter { + iter: self.elem_counts.iter(), + duplicate: None, + duplicate_index: 0, + _ghost: std::marker::PhantomData, + } + } + + /// Returns true if the multiset contains no elements. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset = BTreeMultiSet::new(); + /// assert!(multiset.is_empty()); + /// multiset.insert(1); + /// assert!(!multiset.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.elem_counts.is_empty() + } + + /// Returns `true` if the multiset contains a value. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let set: BTreeMultiSet<_> = [1, 2, 3].iter().cloned().collect(); + /// assert_eq!(set.contains(&1), true); + /// assert_eq!(set.contains(&4), false); + /// ``` + pub fn contains(&self, value: &Q) -> bool + where + K: Borrow, + Q: Ord, + { + self.elem_counts.contains_key(value) + } + + /// Counts all the elements, including each duplicate. + /// + /// # Examples + /// + /// A new empty `BTreeMultiSet` with 0 total elements: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// assert_eq!(0, multiset.len()); + /// ``` + /// + /// A `BTreeMultiSet` from `vec![1,1,2]` has 3 total elements: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// use std::iter::FromIterator; + /// + /// let multiset: BTreeMultiSet = FromIterator::from_iter(vec![1,1,2]); + /// assert_eq!(3, multiset.len()); + /// ``` + pub fn len(&self) -> usize { + self.size + } + + /// Returns all the distinct elements in the `BTreeMultiSet`. + /// + /// # Examples + /// + /// A `BTreeMultiSet` from `vec![1,1,2]` has 2 distinct elements, + /// namely `1` and `2`, but not `3`: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// use std::collections::HashSet; + /// use std::iter::FromIterator; + /// + /// let multiset: BTreeMultiSet = FromIterator::from_iter(vec![1,1,2]); + /// let distinct = multiset.distinct_elements().collect::>(); + /// assert_eq!(2, distinct.len()); + /// assert!(distinct.contains(&1)); + /// assert!(distinct.contains(&2)); + /// assert!(!distinct.contains(&3)); + /// ``` + pub fn distinct_elements<'a>(&'a self) -> Keys<'a, K, usize> { + self.elem_counts.keys() + } + + /// Inserts an element. + /// + /// # Examples + /// + /// Insert `5` into a new `BTreeMultiSet`: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// assert_eq!(0, multiset.count_of(&5)); + /// multiset.insert(5); + /// assert_eq!(1, multiset.count_of(&5)); + /// ``` + pub fn insert(&mut self, val: K) { + self.insert_times(val, 1); + } + + /// Inserts an element `n` times. + /// + /// # Examples + /// + /// Insert three `5`s into a new `BTreeMultiSet`: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// assert_eq!(0, multiset.count_of(&5)); + /// multiset.insert_times(5,3); + /// assert_eq!(3, multiset.count_of(&5)); + /// ``` + pub fn insert_times(&mut self, val: K, n: usize) { + self.size += n; + match self.elem_counts.entry(val) { + Entry::Vacant(view) => { + view.insert(n); + } + Entry::Occupied(mut view) => { + let v = view.get_mut(); + *v += n; + } + } + } + + /// Remove an element. Removal of a nonexistent element + /// has no effect. + /// + /// # Examples + /// + /// Remove `5` from a new `BTreeMultiSet`: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// multiset.insert(5); + /// assert_eq!(1, multiset.count_of(&5)); + /// assert!(multiset.remove(&5)); + /// assert_eq!(0, multiset.count_of(&5)); + /// assert!(!multiset.remove(&5)); + /// ``` + pub fn remove(&mut self, val: &K) -> bool { + self.remove_times(val, 1) > 0 + } + + /// Remove an element `n` times. If an element is + /// removed as many or more times than it appears, + /// it is entirely removed from the multiset. + /// + /// # Examples + /// + /// Remove `5`s from a `BTreeMultiSet` containing 3 of them. + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// multiset.insert_times(5, 3); + /// assert!(multiset.count_of(&5) == 3); + /// assert!(multiset.remove_times(&5, 2) == 2); + /// assert!(multiset.len() == 1); + /// assert!(multiset.count_of(&5) == 1); + /// assert!(multiset.remove_times(&5, 1) == 1); + /// assert!(multiset.len() == 0); + /// assert!(multiset.count_of(&5) == 0); + /// assert!(multiset.remove_times(&5, 1) == 0); + /// assert!(multiset.count_of(&5) == 0); + /// ``` + pub fn remove_times(&mut self, val: &K, times: usize) -> usize { + { + let entry = self.elem_counts.get_mut(val); + if entry.is_some() { + let count = entry.unwrap(); + if *count > times { + *count -= times; + self.size -= times; + return times; + } + self.size -= *count; + } + } + self.elem_counts.remove(val).unwrap_or(0) + } + + /// Remove all of an element from the multiset. + /// + /// # Examples + /// + /// Remove all `5`s from a `BTreeMultiSet` containing 3 of them. + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// multiset.insert_times(5,3); + /// assert!(multiset.count_of(&5) == 3); + /// multiset.remove_all(&5); + /// assert!(multiset.count_of(&5) == 0); + /// assert!(multiset.len() == 0); + /// ``` + pub fn remove_all(&mut self, val: &K) { + self.size -= self.elem_counts.get(val).unwrap_or(&0); + self.elem_counts.remove(val); + } + + /// Counts the occurrences of `val`. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// + /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + /// multiset.insert(0); + /// multiset.insert(0); + /// multiset.insert(1); + /// multiset.insert(0); + /// assert_eq!(3, multiset.count_of(&0)); + /// assert_eq!(1, multiset.count_of(&1)); + /// ``` + pub fn count_of(&self, val: &K) -> usize { + self.elem_counts.get(val).map_or(0, |x| *x) + } +} + +impl Add for BTreeMultiSet +where + T: Ord + Clone, +{ + type Output = BTreeMultiSet; + + /// Combine two `BTreeMultiSet`s by adding the number of each + /// distinct element. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// use std::iter::FromIterator; + /// + /// let lhs: BTreeMultiSet = FromIterator::from_iter(vec![1,2,3]); + /// let rhs: BTreeMultiSet = FromIterator::from_iter(vec![1,1,4]); + /// let combined = lhs + rhs; + /// assert_eq!(3, combined.count_of(&1)); + /// assert_eq!(1, combined.count_of(&2)); + /// assert_eq!(1, combined.count_of(&3)); + /// assert_eq!(1, combined.count_of(&4)); + /// assert_eq!(0, combined.count_of(&5)); + /// ``` + fn add(self, rhs: BTreeMultiSet) -> BTreeMultiSet { + let mut ret: BTreeMultiSet = BTreeMultiSet::new(); + for val in self.distinct_elements() { + let count = self.count_of(val); + ret.insert_times((*val).clone(), count); + } + for val in rhs.distinct_elements() { + let count = rhs.count_of(val); + ret.insert_times((*val).clone(), count); + } + ret + } +} + +impl Sub for BTreeMultiSet +where + T: Ord + Clone, +{ + type Output = BTreeMultiSet; + + /// Combine two `BTreeMultiSet`s by removing elements + /// in the second multiset from the first. As with `remove()` + /// (and set difference), excess elements in the second + /// multiset are ignored. + /// + /// # Examples + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// use std::iter::FromIterator; + /// + /// let lhs: BTreeMultiSet = FromIterator::from_iter(vec![1,2,3]); + /// let rhs: BTreeMultiSet = FromIterator::from_iter(vec![1,1,4]); + /// let combined = lhs - rhs; + /// assert_eq!(0, combined.count_of(&1)); + /// assert_eq!(1, combined.count_of(&2)); + /// assert_eq!(1, combined.count_of(&3)); + /// assert_eq!(0, combined.count_of(&4)); + /// ``` + fn sub(self, rhs: BTreeMultiSet) -> BTreeMultiSet { + let mut ret = self.clone(); + for val in rhs.distinct_elements() { + let count = rhs.count_of(val); + ret.remove_times(val, count); + } + ret + } +} + +impl FromIterator for BTreeMultiSet +where + A: Ord, +{ + /// Creates a new `BTreeMultiSet` from the elements in an iterable. + /// + /// # Examples + /// + /// Count occurrences of each `char` in `"hello world"`: + /// + /// ``` + /// use multiset::BTreeMultiSet; + /// use std::iter::FromIterator; + /// + /// let vals = vec!['h','e','l','l','o',' ','w','o','r','l','d']; + /// let multiset: BTreeMultiSet = FromIterator::from_iter(vals); + /// assert_eq!(1, multiset.count_of(&'h')); + /// assert_eq!(3, multiset.count_of(&'l')); + /// assert_eq!(0, multiset.count_of(&'z')); + /// ``` + fn from_iter(iterable: T) -> BTreeMultiSet + where + T: IntoIterator, + { + let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); + for elem in iterable.into_iter() { + multiset.insert(elem); + } + multiset + } +} + +impl PartialEq for BTreeMultiSet +where + T: Ord, +{ + fn eq(&self, other: &BTreeMultiSet) -> bool { + if self.len() != other.len() { + return false; + } + + self.elem_counts + .iter() + .all(|(key, count)| other.contains(key) && other.elem_counts.get(key).unwrap() == count) + } +} + +impl Eq for BTreeMultiSet where T: Ord {} + +impl fmt::Debug for BTreeMultiSet +where + T: Ord + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +#[cfg(test)] +mod test_multiset { + use super::BTreeMultiSet; + + #[test] + fn test_iterate() { + let mut a = BTreeMultiSet::new(); + for i in 0..16 { + a.insert(i); + } + for i in 0..8 { + a.insert(i); + } + for i in 0..4 { + a.insert(i); + } + let mut observed: u16 = 0; + let mut observed_twice: u16 = 0; + let mut observed_thrice: u16 = 0; + for k in a.iter() { + let bit = 1 << *k; + if observed & bit == 0 { + observed |= bit; + } else if observed_twice & bit == 0 { + observed_twice |= bit; + } else if observed_thrice & bit == 0 { + observed_thrice |= bit; + } + } + assert_eq!(observed, 0xFFFF); + assert_eq!(observed_twice, 0xFF); + assert_eq!(observed_thrice, 0xF); + } + + #[test] + fn test_eq() { + let mut s1 = BTreeMultiSet::new(); + s1.insert(0); + s1.insert(1); + s1.insert(1); + let mut s2 = BTreeMultiSet::new(); + s2.insert(0); + s2.insert(1); + assert!(s1 != s2); + s2.insert(1); + assert_eq!(s1, s2); + } + + #[test] + fn test_size() { + let mut set = BTreeMultiSet::new(); + + assert_eq!(set.len(), 0); + set.insert('a'); + assert_eq!(set.len(), 1); + set.remove(&'a'); + assert_eq!(set.len(), 0); + + set.insert_times('b', 4); + assert_eq!(set.len(), 4); + set.insert('b'); + assert_eq!(set.len(), 5); + set.remove_all(&'b'); + assert_eq!(set.len(), 0); + + set.insert_times('c', 6); + assert_eq!(set.len(), 6); + set.insert_times('c', 3); + assert_eq!(set.len(), 9); + set.insert('c'); + assert_eq!(set.len(), 10); + set.insert('d'); + assert_eq!(set.len(), 11); + set.insert_times('d', 3); + assert_eq!(set.len(), 14); + set.remove_all(&'c'); + assert_eq!(set.len(), 4); + set.remove(&'d'); + assert_eq!(set.len(), 3); + set.remove_times(&'d', 2); + assert_eq!(set.len(), 1); + set.remove(&'d'); + assert_eq!(set.len(), 0); + } +} diff --git a/src/multiset.rs b/src/hash_multiset.rs similarity index 92% rename from src/multiset.rs rename to src/hash_multiset.rs index dacaec8..b9fe8b1 100644 --- a/src/multiset.rs +++ b/src/hash_multiset.rs @@ -7,9 +7,10 @@ // except according to those terms. #![warn(missing_docs)] +use super::Iter; + use std::borrow::Borrow; -use std::collections::hash_map; -use std::collections::hash_map::{Entry, Keys}; +use std::collections::hash_map::{self, Entry, Keys}; use std::collections::HashMap; use std::fmt; use std::hash::Hash; @@ -23,45 +24,6 @@ pub struct HashMultiSet { size: usize, } -/// An iterator over the items of a `HashMultiSet`. -/// -/// This `struct` is created by the [`iter`] method on [`HashMultiSet`]. -pub struct Iter<'a, K: 'a> { - iter: hash_map::Iter<'a, K, usize>, - duplicate: Option<(&'a K, &'a usize)>, - duplicate_index: usize, -} - -impl<'a, K> Clone for Iter<'a, K> { - fn clone(&self) -> Iter<'a, K> { - Iter { - iter: self.iter.clone(), - duplicate: self.duplicate.clone(), - duplicate_index: self.duplicate_index, - } - } -} -impl<'a, K> Iterator for Iter<'a, K> { - type Item = &'a K; - - fn next(&mut self) -> Option<&'a K> { - if self.duplicate.is_none() { - self.duplicate = self.iter.next(); - } - if self.duplicate.is_some() { - let (key, count) = self.duplicate.unwrap(); - self.duplicate_index += 1; - if &self.duplicate_index >= count { - self.duplicate = None; - self.duplicate_index = 0; - } - Some(key) - } else { - None - } - } -} - impl HashMultiSet where K: Eq + Hash, @@ -100,11 +62,12 @@ where /// } /// assert_eq!(3, multiset.iter().count()); /// ``` - pub fn iter(&self) -> Iter { + pub fn iter(&self) -> Iter<&K, &usize, hash_map::Iter> { Iter { iter: self.elem_counts.iter(), duplicate: None, duplicate_index: 0, + _ghost: std::marker::PhantomData, } } diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..6c58186 --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,60 @@ +// Copyright 2019 multiset developers +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +#![warn(missing_docs)] + +use std::borrow::Borrow; +use std::marker::PhantomData; + +/// An iterator over the items of a `MultiSet`. +/// +/// This `struct` is created by the [`iter`](super::HashMultiSet::iter) method on +/// [`HashMultiSet`](super::HashMultiSet) or [`BTreeMultiSet`](super::BTreeMultiSet). +pub struct Iter, InnerIter: Iterator> { + pub(crate) iter: InnerIter, + pub(crate) duplicate: Option<::Item>, + pub(crate) duplicate_index: usize, + pub(crate) _ghost: PhantomData<*const (K, V)>, +} + +impl, InnerIter: Iterator + Clone> Clone + for Iter +where + ::Item: Clone, +{ + fn clone(&self) -> Iter { + Iter { + iter: self.iter.clone(), + duplicate: self.duplicate.clone(), + duplicate_index: self.duplicate_index, + _ghost: PhantomData, + } + } +} + +impl, InnerIter: Iterator> Iterator + for Iter +{ + type Item = K; + + fn next(&mut self) -> Option { + if self.duplicate.is_none() { + self.duplicate = self.iter.next(); + } + if let Some((key, count)) = self.duplicate.as_ref() { + self.duplicate_index += 1; + let key = key.clone(); + if &self.duplicate_index >= count.borrow() { + self.duplicate = None; + self.duplicate_index = 0; + } + Some(key) + } else { + None + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 308c2bd..795e8f4 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,10 @@ //! permit duplicates. Consequently, they're useful for maintaining a //! count of distinct values. -mod multiset; +mod btree_multiset; +mod hash_multiset; +mod iter; -pub use multiset::{HashMultiSet, Iter}; +pub use btree_multiset::BTreeMultiSet; +pub use hash_multiset::HashMultiSet; +pub use iter::Iter; From e143c37da37b6ab03306924da5ed34ce289a0799 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 30 Nov 2019 23:34:07 +0100 Subject: [PATCH 2/9] Specialize iterator for performance Only nth could be optimized further, feel free to implement it :) --- src/btree_multiset.rs | 1 + src/hash_multiset.rs | 1 + src/iter.rs | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/src/btree_multiset.rs b/src/btree_multiset.rs index ac32f8f..9e6bd8f 100644 --- a/src/btree_multiset.rs +++ b/src/btree_multiset.rs @@ -66,6 +66,7 @@ where iter: self.elem_counts.iter(), duplicate: None, duplicate_index: 0, + len: self.size, _ghost: std::marker::PhantomData, } } diff --git a/src/hash_multiset.rs b/src/hash_multiset.rs index b9fe8b1..d7fb673 100644 --- a/src/hash_multiset.rs +++ b/src/hash_multiset.rs @@ -67,6 +67,7 @@ where iter: self.elem_counts.iter(), duplicate: None, duplicate_index: 0, + len: self.size, _ghost: std::marker::PhantomData, } } diff --git a/src/iter.rs b/src/iter.rs index 6c58186..760a691 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -18,6 +18,7 @@ pub struct Iter, InnerIter: Iterator> pub(crate) iter: InnerIter, pub(crate) duplicate: Option<::Item>, pub(crate) duplicate_index: usize, + pub(crate) len: usize, pub(crate) _ghost: PhantomData<*const (K, V)>, } @@ -31,6 +32,7 @@ where iter: self.iter.clone(), duplicate: self.duplicate.clone(), duplicate_index: self.duplicate_index, + len: self.len, _ghost: PhantomData, } } @@ -52,9 +54,41 @@ impl, InnerIter: Iterator> Iterator self.duplicate = None; self.duplicate_index = 0; } + self.len -= 1; Some(key) } else { None } } + + fn count(self) -> usize { + self.len() + } + + fn fold(self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + let duplicate_index = self.duplicate_index; + self.duplicate + .map(move |(val, count)| (val, *count.borrow() - duplicate_index)) + .into_iter() + .chain(self.iter.map(move |(val, count)| (val, *count.borrow()))) + .fold(init, move |acc, (val, count)| { + (0..count).fold(acc, |acc, _| f(acc, val.clone())) + }) + } + + fn size_hint(&self) -> (usize, Option) { + let l = self.len(); + (l, Some(l)) + } +} + +impl, InnerIter: Iterator> ExactSizeIterator + for Iter +{ + fn len(&self) -> usize { + self.len + } } From 8b3d25f517157b02b73ede06c7f8ee63835a5047 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 1 Dec 2019 00:31:51 +0100 Subject: [PATCH 3/9] Implement DoubleEndedIterator for Iter --- src/btree_multiset.rs | 8 +--- src/hash_multiset.rs | 8 +--- src/iter.rs | 96 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 96 insertions(+), 16 deletions(-) diff --git a/src/btree_multiset.rs b/src/btree_multiset.rs index 9e6bd8f..3bb1ea2 100644 --- a/src/btree_multiset.rs +++ b/src/btree_multiset.rs @@ -62,13 +62,7 @@ where /// assert_eq!(3, multiset.iter().count()); /// ``` pub fn iter(&self) -> Iter<&K, &usize, btree_map::Iter> { - Iter { - iter: self.elem_counts.iter(), - duplicate: None, - duplicate_index: 0, - len: self.size, - _ghost: std::marker::PhantomData, - } + Iter::new(self.elem_counts.iter(), self.size) } /// Returns true if the multiset contains no elements. diff --git a/src/hash_multiset.rs b/src/hash_multiset.rs index d7fb673..e439def 100644 --- a/src/hash_multiset.rs +++ b/src/hash_multiset.rs @@ -63,13 +63,7 @@ where /// assert_eq!(3, multiset.iter().count()); /// ``` pub fn iter(&self) -> Iter<&K, &usize, hash_map::Iter> { - Iter { - iter: self.elem_counts.iter(), - duplicate: None, - duplicate_index: 0, - len: self.size, - _ghost: std::marker::PhantomData, - } + Iter::new(self.elem_counts.iter(), self.size) } /// Returns true if the multiset contains no elements. diff --git a/src/iter.rs b/src/iter.rs index 760a691..5b3909b 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -18,10 +18,28 @@ pub struct Iter, InnerIter: Iterator> pub(crate) iter: InnerIter, pub(crate) duplicate: Option<::Item>, pub(crate) duplicate_index: usize, + pub(crate) duplicate_back: Option<::Item>, + pub(crate) duplicate_index_back: usize, pub(crate) len: usize, pub(crate) _ghost: PhantomData<*const (K, V)>, } +impl, InnerIter: Iterator + ExactSizeIterator> + Iter +{ + pub(crate) fn new(iter: InnerIter, len: usize) -> Self { + Iter { + iter, + duplicate: None, + duplicate_index: 0, + duplicate_back: None, + duplicate_index_back: 0, + len, + _ghost: PhantomData, + } + } +} + impl, InnerIter: Iterator + Clone> Clone for Iter where @@ -32,6 +50,8 @@ where iter: self.iter.clone(), duplicate: self.duplicate.clone(), duplicate_index: self.duplicate_index, + duplicate_back: self.duplicate_back.clone(), + duplicate_index_back: self.duplicate_index_back, len: self.len, _ghost: PhantomData, } @@ -50,14 +70,24 @@ impl, InnerIter: Iterator> Iterator if let Some((key, count)) = self.duplicate.as_ref() { self.duplicate_index += 1; let key = key.clone(); - if &self.duplicate_index >= count.borrow() { + if self.duplicate_index >= *count.borrow() { self.duplicate = None; self.duplicate_index = 0; } self.len -= 1; Some(key) } else { - None + if let Some((key, count)) = self.duplicate_back.as_ref() { + self.duplicate_index_back += 1; + let key = key.clone(); + if self.duplicate_index_back >= *count.borrow() { + self.duplicate_back = None; + } + self.len -= 1; + Some(key) + } else { + None + } } } @@ -70,10 +100,16 @@ impl, InnerIter: Iterator> Iterator F: FnMut(B, Self::Item) -> B, { let duplicate_index = self.duplicate_index; + let duplicate_index_back = self.duplicate_index_back; self.duplicate .map(move |(val, count)| (val, *count.borrow() - duplicate_index)) .into_iter() .chain(self.iter.map(move |(val, count)| (val, *count.borrow()))) + .chain( + self.duplicate_back + .map(move |(val, count)| (val, *count.borrow() - duplicate_index_back)) + .into_iter(), + ) .fold(init, move |acc, (val, count)| { (0..count).fold(acc, |acc, _| f(acc, val.clone())) }) @@ -92,3 +128,59 @@ impl, InnerIter: Iterator> ExactSizeIt self.len } } + +impl, InnerIter: Iterator + DoubleEndedIterator> + DoubleEndedIterator for Iter +{ + fn next_back(&mut self) -> Option { + if self.duplicate_back.is_none() { + self.duplicate_back = self.iter.next_back(); + } + if let Some((key, count)) = self.duplicate_back.as_ref() { + self.duplicate_index_back += 1; + let key = key.clone(); + if self.duplicate_index_back >= *count.borrow() { + self.duplicate_back = None; + self.duplicate_index_back = 0; + } + self.len -= 1; + Some(key) + } else { + if let Some((key, count)) = self.duplicate.as_ref() { + self.duplicate_index += 1; + let key = key.clone(); + if self.duplicate_index >= *count.borrow() { + self.duplicate = None; + } + self.len -= 1; + Some(key) + } else { + None + } + } + } + + fn rfold(self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + let duplicate_index = self.duplicate_index; + let duplicate_index_back = self.duplicate_index_back; + self.duplicate_back + .map(move |(val, count)| (val, *count.borrow() - duplicate_index_back)) + .into_iter() + .chain( + self.iter + .rev() + .map(move |(val, count)| (val, *count.borrow())), + ) + .chain( + self.duplicate + .map(move |(val, count)| (val, *count.borrow() - duplicate_index)) + .into_iter(), + ) + .fold(init, move |acc, (val, count)| { + (0..count).fold(acc, |acc, _| f(acc, val.clone())) + }) + } +} From c647e0416dee736d40be8d37bff8adaa64b590de Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 1 Dec 2019 01:38:21 +0100 Subject: [PATCH 4/9] Impl FusedIterator for Iter --- src/iter.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/iter.rs b/src/iter.rs index 5b3909b..fa153e0 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -184,3 +184,8 @@ impl, InnerIter: Iterator + DoubleEnde }) } } + +impl, InnerIter: Iterator + std::iter::FusedIterator> + std::iter::FusedIterator for Iter +{ +} From 518ca328fa2f69b1ad5d083d46dce822edbb2ac6 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 4 Jan 2020 13:47:21 +0100 Subject: [PATCH 5/9] Implement BTreeMultiset through a build script instead of having duplicate code --- .gitignore | 1 + build.rs | 14 ++ src/btree_multiset.rs | 513 ------------------------------------------ src/hash_multiset.rs | 6 +- 4 files changed, 16 insertions(+), 518 deletions(-) create mode 100644 build.rs delete mode 100644 src/btree_multiset.rs diff --git a/.gitignore b/.gitignore index a9d37c5..b53cdd6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +/src/btree_multiset.rs diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..b375ec4 --- /dev/null +++ b/build.rs @@ -0,0 +1,14 @@ +fn main() { + let btree_multiset_code = std::fs::read_to_string("./src/hash_multiset.rs") + .expect("Could not open hash_multiset source file") + .replace("Hash + Eq", "Ord") + .replace("Eq + Hash", "Ord") + .replace("hash_map::", "btree_map::") + .replace("HashMap", "BTreeMap") + .replace("HashMultiSet", "BTreeMultiSet") + .replace("use std::hash::Hash;\n", "") + .replace("hash-based multiset", "tree-based multiset"); + std::fs::write("./src/btree_multiset.rs", btree_multiset_code.as_bytes()) + .expect("Could not write btree_multiset file"); + println!("cargo:rerun-if-changed=./src/hash_multiset.rs"); +} diff --git a/src/btree_multiset.rs b/src/btree_multiset.rs deleted file mode 100644 index 3bb1ea2..0000000 --- a/src/btree_multiset.rs +++ /dev/null @@ -1,513 +0,0 @@ -// Copyright 2019 multiset developers -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -#![warn(missing_docs)] - -use super::Iter; - -use std::borrow::Borrow; -use std::collections::btree_map::{self, Entry, Keys}; -use std::collections::BTreeMap; -use std::fmt; -use std::iter::{FromIterator, IntoIterator}; -use std::ops::{Add, Sub}; - -#[derive(Clone)] -/// A tree-based multiset. -pub struct BTreeMultiSet { - elem_counts: BTreeMap, - size: usize, -} - -impl BTreeMultiSet -where - K: Ord, -{ - /// Creates a new empty `BTreeMultiSet`. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// ``` - pub fn new() -> Self { - BTreeMultiSet { - elem_counts: BTreeMap::new(), - size: 0, - } - } - - /// An iterator visiting all elements in arbitrary order, including each duplicate. - /// The iterator element type is `&'a K`. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// let mut multiset = BTreeMultiSet::new(); - /// multiset.insert(0); - /// multiset.insert(0); - /// multiset.insert(1); - /// - /// // Will print in an arbitrary order. - /// for x in multiset.iter() { - /// println!("{}", x); - /// } - /// assert_eq!(3, multiset.iter().count()); - /// ``` - pub fn iter(&self) -> Iter<&K, &usize, btree_map::Iter> { - Iter::new(self.elem_counts.iter(), self.size) - } - - /// Returns true if the multiset contains no elements. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset = BTreeMultiSet::new(); - /// assert!(multiset.is_empty()); - /// multiset.insert(1); - /// assert!(!multiset.is_empty()); - /// ``` - pub fn is_empty(&self) -> bool { - self.elem_counts.is_empty() - } - - /// Returns `true` if the multiset contains a value. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let set: BTreeMultiSet<_> = [1, 2, 3].iter().cloned().collect(); - /// assert_eq!(set.contains(&1), true); - /// assert_eq!(set.contains(&4), false); - /// ``` - pub fn contains(&self, value: &Q) -> bool - where - K: Borrow, - Q: Ord, - { - self.elem_counts.contains_key(value) - } - - /// Counts all the elements, including each duplicate. - /// - /// # Examples - /// - /// A new empty `BTreeMultiSet` with 0 total elements: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// assert_eq!(0, multiset.len()); - /// ``` - /// - /// A `BTreeMultiSet` from `vec![1,1,2]` has 3 total elements: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// use std::iter::FromIterator; - /// - /// let multiset: BTreeMultiSet = FromIterator::from_iter(vec![1,1,2]); - /// assert_eq!(3, multiset.len()); - /// ``` - pub fn len(&self) -> usize { - self.size - } - - /// Returns all the distinct elements in the `BTreeMultiSet`. - /// - /// # Examples - /// - /// A `BTreeMultiSet` from `vec![1,1,2]` has 2 distinct elements, - /// namely `1` and `2`, but not `3`: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// use std::collections::HashSet; - /// use std::iter::FromIterator; - /// - /// let multiset: BTreeMultiSet = FromIterator::from_iter(vec![1,1,2]); - /// let distinct = multiset.distinct_elements().collect::>(); - /// assert_eq!(2, distinct.len()); - /// assert!(distinct.contains(&1)); - /// assert!(distinct.contains(&2)); - /// assert!(!distinct.contains(&3)); - /// ``` - pub fn distinct_elements<'a>(&'a self) -> Keys<'a, K, usize> { - self.elem_counts.keys() - } - - /// Inserts an element. - /// - /// # Examples - /// - /// Insert `5` into a new `BTreeMultiSet`: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// assert_eq!(0, multiset.count_of(&5)); - /// multiset.insert(5); - /// assert_eq!(1, multiset.count_of(&5)); - /// ``` - pub fn insert(&mut self, val: K) { - self.insert_times(val, 1); - } - - /// Inserts an element `n` times. - /// - /// # Examples - /// - /// Insert three `5`s into a new `BTreeMultiSet`: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// assert_eq!(0, multiset.count_of(&5)); - /// multiset.insert_times(5,3); - /// assert_eq!(3, multiset.count_of(&5)); - /// ``` - pub fn insert_times(&mut self, val: K, n: usize) { - self.size += n; - match self.elem_counts.entry(val) { - Entry::Vacant(view) => { - view.insert(n); - } - Entry::Occupied(mut view) => { - let v = view.get_mut(); - *v += n; - } - } - } - - /// Remove an element. Removal of a nonexistent element - /// has no effect. - /// - /// # Examples - /// - /// Remove `5` from a new `BTreeMultiSet`: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// multiset.insert(5); - /// assert_eq!(1, multiset.count_of(&5)); - /// assert!(multiset.remove(&5)); - /// assert_eq!(0, multiset.count_of(&5)); - /// assert!(!multiset.remove(&5)); - /// ``` - pub fn remove(&mut self, val: &K) -> bool { - self.remove_times(val, 1) > 0 - } - - /// Remove an element `n` times. If an element is - /// removed as many or more times than it appears, - /// it is entirely removed from the multiset. - /// - /// # Examples - /// - /// Remove `5`s from a `BTreeMultiSet` containing 3 of them. - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// multiset.insert_times(5, 3); - /// assert!(multiset.count_of(&5) == 3); - /// assert!(multiset.remove_times(&5, 2) == 2); - /// assert!(multiset.len() == 1); - /// assert!(multiset.count_of(&5) == 1); - /// assert!(multiset.remove_times(&5, 1) == 1); - /// assert!(multiset.len() == 0); - /// assert!(multiset.count_of(&5) == 0); - /// assert!(multiset.remove_times(&5, 1) == 0); - /// assert!(multiset.count_of(&5) == 0); - /// ``` - pub fn remove_times(&mut self, val: &K, times: usize) -> usize { - { - let entry = self.elem_counts.get_mut(val); - if entry.is_some() { - let count = entry.unwrap(); - if *count > times { - *count -= times; - self.size -= times; - return times; - } - self.size -= *count; - } - } - self.elem_counts.remove(val).unwrap_or(0) - } - - /// Remove all of an element from the multiset. - /// - /// # Examples - /// - /// Remove all `5`s from a `BTreeMultiSet` containing 3 of them. - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// multiset.insert_times(5,3); - /// assert!(multiset.count_of(&5) == 3); - /// multiset.remove_all(&5); - /// assert!(multiset.count_of(&5) == 0); - /// assert!(multiset.len() == 0); - /// ``` - pub fn remove_all(&mut self, val: &K) { - self.size -= self.elem_counts.get(val).unwrap_or(&0); - self.elem_counts.remove(val); - } - - /// Counts the occurrences of `val`. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// - /// let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - /// multiset.insert(0); - /// multiset.insert(0); - /// multiset.insert(1); - /// multiset.insert(0); - /// assert_eq!(3, multiset.count_of(&0)); - /// assert_eq!(1, multiset.count_of(&1)); - /// ``` - pub fn count_of(&self, val: &K) -> usize { - self.elem_counts.get(val).map_or(0, |x| *x) - } -} - -impl Add for BTreeMultiSet -where - T: Ord + Clone, -{ - type Output = BTreeMultiSet; - - /// Combine two `BTreeMultiSet`s by adding the number of each - /// distinct element. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// use std::iter::FromIterator; - /// - /// let lhs: BTreeMultiSet = FromIterator::from_iter(vec![1,2,3]); - /// let rhs: BTreeMultiSet = FromIterator::from_iter(vec![1,1,4]); - /// let combined = lhs + rhs; - /// assert_eq!(3, combined.count_of(&1)); - /// assert_eq!(1, combined.count_of(&2)); - /// assert_eq!(1, combined.count_of(&3)); - /// assert_eq!(1, combined.count_of(&4)); - /// assert_eq!(0, combined.count_of(&5)); - /// ``` - fn add(self, rhs: BTreeMultiSet) -> BTreeMultiSet { - let mut ret: BTreeMultiSet = BTreeMultiSet::new(); - for val in self.distinct_elements() { - let count = self.count_of(val); - ret.insert_times((*val).clone(), count); - } - for val in rhs.distinct_elements() { - let count = rhs.count_of(val); - ret.insert_times((*val).clone(), count); - } - ret - } -} - -impl Sub for BTreeMultiSet -where - T: Ord + Clone, -{ - type Output = BTreeMultiSet; - - /// Combine two `BTreeMultiSet`s by removing elements - /// in the second multiset from the first. As with `remove()` - /// (and set difference), excess elements in the second - /// multiset are ignored. - /// - /// # Examples - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// use std::iter::FromIterator; - /// - /// let lhs: BTreeMultiSet = FromIterator::from_iter(vec![1,2,3]); - /// let rhs: BTreeMultiSet = FromIterator::from_iter(vec![1,1,4]); - /// let combined = lhs - rhs; - /// assert_eq!(0, combined.count_of(&1)); - /// assert_eq!(1, combined.count_of(&2)); - /// assert_eq!(1, combined.count_of(&3)); - /// assert_eq!(0, combined.count_of(&4)); - /// ``` - fn sub(self, rhs: BTreeMultiSet) -> BTreeMultiSet { - let mut ret = self.clone(); - for val in rhs.distinct_elements() { - let count = rhs.count_of(val); - ret.remove_times(val, count); - } - ret - } -} - -impl FromIterator for BTreeMultiSet -where - A: Ord, -{ - /// Creates a new `BTreeMultiSet` from the elements in an iterable. - /// - /// # Examples - /// - /// Count occurrences of each `char` in `"hello world"`: - /// - /// ``` - /// use multiset::BTreeMultiSet; - /// use std::iter::FromIterator; - /// - /// let vals = vec!['h','e','l','l','o',' ','w','o','r','l','d']; - /// let multiset: BTreeMultiSet = FromIterator::from_iter(vals); - /// assert_eq!(1, multiset.count_of(&'h')); - /// assert_eq!(3, multiset.count_of(&'l')); - /// assert_eq!(0, multiset.count_of(&'z')); - /// ``` - fn from_iter(iterable: T) -> BTreeMultiSet - where - T: IntoIterator, - { - let mut multiset: BTreeMultiSet = BTreeMultiSet::new(); - for elem in iterable.into_iter() { - multiset.insert(elem); - } - multiset - } -} - -impl PartialEq for BTreeMultiSet -where - T: Ord, -{ - fn eq(&self, other: &BTreeMultiSet) -> bool { - if self.len() != other.len() { - return false; - } - - self.elem_counts - .iter() - .all(|(key, count)| other.contains(key) && other.elem_counts.get(key).unwrap() == count) - } -} - -impl Eq for BTreeMultiSet where T: Ord {} - -impl fmt::Debug for BTreeMultiSet -where - T: Ord + fmt::Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_set().entries(self.iter()).finish() - } -} - -#[cfg(test)] -mod test_multiset { - use super::BTreeMultiSet; - - #[test] - fn test_iterate() { - let mut a = BTreeMultiSet::new(); - for i in 0..16 { - a.insert(i); - } - for i in 0..8 { - a.insert(i); - } - for i in 0..4 { - a.insert(i); - } - let mut observed: u16 = 0; - let mut observed_twice: u16 = 0; - let mut observed_thrice: u16 = 0; - for k in a.iter() { - let bit = 1 << *k; - if observed & bit == 0 { - observed |= bit; - } else if observed_twice & bit == 0 { - observed_twice |= bit; - } else if observed_thrice & bit == 0 { - observed_thrice |= bit; - } - } - assert_eq!(observed, 0xFFFF); - assert_eq!(observed_twice, 0xFF); - assert_eq!(observed_thrice, 0xF); - } - - #[test] - fn test_eq() { - let mut s1 = BTreeMultiSet::new(); - s1.insert(0); - s1.insert(1); - s1.insert(1); - let mut s2 = BTreeMultiSet::new(); - s2.insert(0); - s2.insert(1); - assert!(s1 != s2); - s2.insert(1); - assert_eq!(s1, s2); - } - - #[test] - fn test_size() { - let mut set = BTreeMultiSet::new(); - - assert_eq!(set.len(), 0); - set.insert('a'); - assert_eq!(set.len(), 1); - set.remove(&'a'); - assert_eq!(set.len(), 0); - - set.insert_times('b', 4); - assert_eq!(set.len(), 4); - set.insert('b'); - assert_eq!(set.len(), 5); - set.remove_all(&'b'); - assert_eq!(set.len(), 0); - - set.insert_times('c', 6); - assert_eq!(set.len(), 6); - set.insert_times('c', 3); - assert_eq!(set.len(), 9); - set.insert('c'); - assert_eq!(set.len(), 10); - set.insert('d'); - assert_eq!(set.len(), 11); - set.insert_times('d', 3); - assert_eq!(set.len(), 14); - set.remove_all(&'c'); - assert_eq!(set.len(), 4); - set.remove(&'d'); - assert_eq!(set.len(), 3); - set.remove_times(&'d', 2); - assert_eq!(set.len(), 1); - set.remove(&'d'); - assert_eq!(set.len(), 0); - } -} diff --git a/src/hash_multiset.rs b/src/hash_multiset.rs index e439def..790cedc 100644 --- a/src/hash_multiset.rs +++ b/src/hash_multiset.rs @@ -84,10 +84,6 @@ where /// Returns `true` if the multiset contains a value. /// - /// The value may be any borrowed form of the set's value type, but - /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for - /// the value type. - /// /// # Examples /// /// ``` @@ -100,7 +96,7 @@ where pub fn contains(&self, value: &Q) -> bool where K: Borrow, - Q: Hash + Eq, + Q: Eq + Hash, { self.elem_counts.contains_key(value) } From 924b6eead56c8f065fa2ba72ba4761c7d521547e Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 11 Jan 2020 14:35:35 +0100 Subject: [PATCH 6/9] Revert version bump --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bb1c5a4..2e370ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "multiset" -version = "0.1.0" +version = "0.0.5" repository = "https://github.com/jmitchell/multiset" description = "Multisets/bags" keywords = ["multiset","bag","data-structure","collection","count"] From 5d4c84decc235b1e6f769e81d80a82c96359b2d3 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 11 Jan 2020 14:36:15 +0100 Subject: [PATCH 7/9] Indent build.rs with spaces --- build.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/build.rs b/build.rs index b375ec4..9c1efb7 100644 --- a/build.rs +++ b/build.rs @@ -1,14 +1,14 @@ fn main() { - let btree_multiset_code = std::fs::read_to_string("./src/hash_multiset.rs") - .expect("Could not open hash_multiset source file") - .replace("Hash + Eq", "Ord") - .replace("Eq + Hash", "Ord") - .replace("hash_map::", "btree_map::") - .replace("HashMap", "BTreeMap") - .replace("HashMultiSet", "BTreeMultiSet") - .replace("use std::hash::Hash;\n", "") - .replace("hash-based multiset", "tree-based multiset"); - std::fs::write("./src/btree_multiset.rs", btree_multiset_code.as_bytes()) - .expect("Could not write btree_multiset file"); - println!("cargo:rerun-if-changed=./src/hash_multiset.rs"); + let btree_multiset_code = std::fs::read_to_string("./src/hash_multiset.rs") + .expect("Could not open hash_multiset source file") + .replace("Hash + Eq", "Ord") + .replace("Eq + Hash", "Ord") + .replace("hash_map::", "btree_map::") + .replace("HashMap", "BTreeMap") + .replace("HashMultiSet", "BTreeMultiSet") + .replace("use std::hash::Hash;\n", "") + .replace("hash-based multiset", "tree-based multiset"); + std::fs::write("./src/btree_multiset.rs", btree_multiset_code.as_bytes()) + .expect("Could not write btree_multiset file"); + println!("cargo:rerun-if-changed=./src/hash_multiset.rs"); } From 0b28b688b97f6e2763d3372b49eb9828a6f65a4c Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 11 Jan 2020 14:39:15 +0100 Subject: [PATCH 8/9] Derive clone --- src/iter.rs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/iter.rs b/src/iter.rs index fa153e0..bf1d62b 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -14,6 +14,7 @@ use std::marker::PhantomData; /// /// This `struct` is created by the [`iter`](super::HashMultiSet::iter) method on /// [`HashMultiSet`](super::HashMultiSet) or [`BTreeMultiSet`](super::BTreeMultiSet). +#[derive(Clone)] pub struct Iter, InnerIter: Iterator> { pub(crate) iter: InnerIter, pub(crate) duplicate: Option<::Item>, @@ -40,24 +41,6 @@ impl, InnerIter: Iterator + ExactSizeI } } -impl, InnerIter: Iterator + Clone> Clone - for Iter -where - ::Item: Clone, -{ - fn clone(&self) -> Iter { - Iter { - iter: self.iter.clone(), - duplicate: self.duplicate.clone(), - duplicate_index: self.duplicate_index, - duplicate_back: self.duplicate_back.clone(), - duplicate_index_back: self.duplicate_index_back, - len: self.len, - _ghost: PhantomData, - } - } -} - impl, InnerIter: Iterator> Iterator for Iter { From 7d685302322a94dfe3521ed43f067ec924f5f782 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 11 Jan 2020 14:53:22 +0100 Subject: [PATCH 9/9] Add specialization tests --- Cargo.toml | 3 ++ tests/specializations.rs | 96 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 tests/specializations.rs diff --git a/Cargo.toml b/Cargo.toml index 2e370ce..3e7b6f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,6 @@ description = "Multisets/bags" keywords = ["multiset","bag","data-structure","collection","count"] license = "MIT/Apache-2.0" authors = ["Jake Mitchell "] + +[dev-dependencies] +quickcheck = "0.9.1" diff --git a/tests/specializations.rs b/tests/specializations.rs new file mode 100644 index 0000000..be6c009 --- /dev/null +++ b/tests/specializations.rs @@ -0,0 +1,96 @@ +#[macro_use] +extern crate quickcheck; + +use std::fmt::Debug; +use std::ops::BitXor; + +struct Unspecialized(I); +impl Iterator for Unspecialized +where + I: Iterator, +{ + type Item = I::Item; + + #[inline(always)] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +fn check_specialized<'a, V, IterItem, Iter, F>(iterator: &Iter, mapper: F) +where + V: Eq + Debug, + IterItem: 'a, + Iter: Iterator + Clone + 'a, + F: Fn(Box + 'a>) -> V, +{ + assert_eq!( + mapper(Box::new(Unspecialized(iterator.clone()))), + mapper(Box::new(iterator.clone())) + ) +} + +fn check_specialized_count_last_nth_sizeh<'a, IterItem, Iter>( + it: &Iter, + known_expected_size: Option, +) where + IterItem: 'a + Eq + Debug, + Iter: Iterator + Clone + 'a, +{ + let size = it.clone().count(); + if let Some(expected_size) = known_expected_size { + assert_eq!(size, expected_size); + } + check_specialized(it, |i| i.count()); + check_specialized(it, |i| i.last()); + for n in 0..size + 2 { + check_specialized(it, |mut i| i.nth(n)); + } + let mut it_sh = it.clone(); + for n in 0..size + 2 { + let len = it_sh.clone().count(); + let (min, max) = it_sh.size_hint(); + assert_eq!((size - n.min(size)), len); + assert!(min <= len); + if let Some(max) = max { + assert!(len <= max); + } + it_sh.next(); + } +} + +fn check_specialized_fold_xor<'a, IterItem, Iter>(it: &Iter) +where + IterItem: 'a + + BitXor + + Eq + + Debug + + BitXor<::Output, Output = ::Output> + + Clone, + ::Output: + BitXor::Output> + Eq + Debug + Clone, + Iter: Iterator + Clone + 'a, +{ + check_specialized(it, |mut i| { + let first = i.next().map(|f| f.clone() ^ (f.clone() ^ f)); + i.fold(first, |acc, v: IterItem| acc.map(move |a| v ^ a)) + }); +} + +fn hms_test(test_vec: Vec, known_expected_size: Option) { + let hms: multiset::HashMultiSet<_> = test_vec.into_iter().collect(); + let iter = hms.iter(); + check_specialized_count_last_nth_sizeh(&iter, known_expected_size.map(|x| x + 1)); + check_specialized_fold_xor(&iter) +} + +quickcheck! { + fn hms_test_qc(test_vec: Vec) -> () { + hms_test(test_vec, None) + } +}