From 1f9b516f111bf9f06a67094c52a84d515fea8cdd Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Wed, 13 Nov 2024 15:00:48 +0000 Subject: [PATCH] feat: implement `keys` and `values` on `StableBTreeMap` (#241) This adds `keys` and `values` to `StableBTreeMap` bringing it more in line with the std `BTreeMap`. It also adds `keys_range` and `values_range` which aren't exposed by the std `BTreeMap`, with the std map these aren't really needed because the keys and values are returned by reference, so it is still efficient to use `range` and then `map` to get the keys or the values. But with the `StableBTreeMap` using the same approach would result in reading and deserializing the keys and values, only to throw one set of them away. --- benchmarks/src/btreemap.rs | 76 +++++++++--- canbench_results.yml | 56 ++++++++- src/btreemap.rs | 56 +++++++-- src/btreemap/iter.rs | 243 +++++++++++++++++++++++++++++++++++-- src/btreemap/proptests.rs | 44 ++++++- 5 files changed, 431 insertions(+), 44 deletions(-) diff --git a/benchmarks/src/btreemap.rs b/benchmarks/src/btreemap.rs index d280a167..e4c133ee 100644 --- a/benchmarks/src/btreemap.rs +++ b/benchmarks/src/btreemap.rs @@ -222,22 +222,62 @@ pub fn btreemap_insert_10mib_values() -> BenchResult { #[bench(raw)] pub fn btreemap_iter_small_values() -> BenchResult { - iter_helper(10_000, 0) + iter_helper(10_000, 0, IterType::Iter) } #[bench(raw)] pub fn btreemap_iter_rev_small_values() -> BenchResult { - iter_rev_helper(10_000, 0) + iter_helper(10_000, 0, IterType::IterRev) } #[bench(raw)] pub fn btreemap_iter_10mib_values() -> BenchResult { - iter_helper(200, 10 * 1024) + iter_helper(200, 10 * 1024, IterType::Iter) } #[bench(raw)] pub fn btreemap_iter_rev_10mib_values() -> BenchResult { - iter_rev_helper(200, 10 * 1024) + iter_helper(200, 10 * 1024, IterType::IterRev) +} + +#[bench(raw)] +pub fn btreemap_keys_small_values() -> BenchResult { + iter_helper(10_000, 0, IterType::Keys) +} + +#[bench(raw)] +pub fn btreemap_keys_rev_small_values() -> BenchResult { + iter_helper(10_000, 0, IterType::KeysRev) +} + +#[bench(raw)] +pub fn btreemap_keys_10mib_values() -> BenchResult { + iter_helper(200, 10 * 1024, IterType::Keys) +} + +#[bench(raw)] +pub fn btreemap_keys_rev_10mib_values() -> BenchResult { + iter_helper(200, 10 * 1024, IterType::KeysRev) +} + +#[bench(raw)] +pub fn btreemap_values_small_values() -> BenchResult { + iter_helper(10_000, 0, IterType::Values) +} + +#[bench(raw)] +pub fn btreemap_values_rev_small_values() -> BenchResult { + iter_helper(10_000, 0, IterType::ValuesRev) +} + +#[bench(raw)] +pub fn btreemap_values_10mib_values() -> BenchResult { + iter_helper(200, 10 * 1024, IterType::Values) +} + +#[bench(raw)] +pub fn btreemap_values_rev_10mib_values() -> BenchResult { + iter_helper(200, 10 * 1024, IterType::ValuesRev) } #[bench(raw)] @@ -538,23 +578,20 @@ fn insert_helper( } // Profiles iterating over a btreemap. -fn iter_helper(size: u32, value_size: u32) -> BenchResult { +fn iter_helper(size: u32, value_size: u32, iter_type: IterType) -> BenchResult { let mut btree = BTreeMap::new(DefaultMemoryImpl::default()); for i in 0..size { btree.insert(i, vec![0u8; value_size as usize]); } - bench_fn(|| for _ in btree.iter() {}) -} - -// Profiles iterating in reverse over a btreemap. -fn iter_rev_helper(size: u32, value_size: u32) -> BenchResult { - let mut btree = BTreeMap::new(DefaultMemoryImpl::default()); - for i in 0..size { - btree.insert(i, vec![0u8; value_size as usize]); + match iter_type { + IterType::Iter => bench_fn(|| for _ in btree.iter() {}), + IterType::IterRev => bench_fn(|| for _ in btree.iter().rev() {}), + IterType::Keys => bench_fn(|| for _ in btree.keys() {}), + IterType::KeysRev => bench_fn(|| for _ in btree.keys().rev() {}), + IterType::Values => bench_fn(|| for _ in btree.values() {}), + IterType::ValuesRev => bench_fn(|| for _ in btree.values().rev() {}), } - - bench_fn(|| for _ in btree.iter().rev() {}) } // Profiles getting a large number of random blobs from a btreemap. @@ -630,3 +667,12 @@ fn remove_helper( } }) } + +enum IterType { + Iter, + IterRev, + Keys, + KeysRev, + Values, + ValuesRev, +} diff --git a/canbench_results.yml b/canbench_results.yml index 830313c6..9ee52cc3 100644 --- a/canbench_results.yml +++ b/canbench_results.yml @@ -385,13 +385,13 @@ benches: scopes: {} btreemap_iter_rev_10mib_values: total: - instructions: 25585550 + instructions: 25584039 heap_increase: 0 stable_memory_increase: 0 scopes: {} btreemap_iter_rev_small_values: total: - instructions: 23878236 + instructions: 23800315 heap_increase: 0 stable_memory_increase: 0 scopes: {} @@ -401,6 +401,30 @@ benches: heap_increase: 0 stable_memory_increase: 0 scopes: {} + btreemap_keys_10mib_values: + total: + instructions: 534290 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreemap_keys_rev_10mib_values: + total: + instructions: 595467 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreemap_keys_rev_small_values: + total: + instructions: 14369449 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreemap_keys_small_values: + total: + instructions: 11184261 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} btreemap_remove_blob_128_1024: total: instructions: 1916049094 @@ -533,6 +557,30 @@ benches: heap_increase: 0 stable_memory_increase: 0 scopes: {} + btreemap_values_10mib_values: + total: + instructions: 17277830 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreemap_values_rev_10mib_values: + total: + instructions: 17276742 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreemap_values_rev_small_values: + total: + instructions: 22619653 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreemap_values_small_values: + total: + instructions: 22560352 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} memory_manager_baseline: total: instructions: 1176577052 @@ -541,13 +589,13 @@ benches: scopes: {} memory_manager_grow: total: - instructions: 350727867 + instructions: 351687872 heap_increase: 2 stable_memory_increase: 32000 scopes: {} memory_manager_overhead: total: - instructions: 1182141676 + instructions: 1182143127 heap_increase: 0 stable_memory_increase: 8320 scopes: {} diff --git a/src/btreemap.rs b/src/btreemap.rs index 3bfa1bfd..c1c76025 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -51,6 +51,7 @@ mod allocator; mod iter; mod node; +use crate::btreemap::iter::{IterInternal, KeysIter, ValuesIter}; use crate::{ storable::Bound as StorableBound, types::{Address, NULL}, @@ -1006,15 +1007,54 @@ where /// Returns an iterator over the entries of the map, sorted by key. pub fn iter(&self) -> Iter { - Iter::new(self) + self.iter_internal().into() } /// Returns an iterator over the entries in the map where keys /// belong to the specified range. pub fn range(&self, key_range: impl RangeBounds) -> Iter { + self.range_internal(key_range).into() + } + + /// Returns an iterator pointing to the first element below the given bound. + /// Returns an empty iterator if there are no keys below the given bound. + pub fn iter_upper_bound(&self, bound: &K) -> Iter { + if let Some((start_key, _)) = self.range(..bound).next_back() { + IterInternal::new_in_range(self, (Bound::Included(start_key), Bound::Unbounded)).into() + } else { + IterInternal::null(self).into() + } + } + + /// Returns an iterator over the keys of the map. + pub fn keys(&self) -> KeysIter { + self.iter_internal().into() + } + + /// Returns an iterator over the keys of the map which belong to the specified range. + pub fn keys_range(&self, key_range: impl RangeBounds) -> KeysIter { + self.range_internal(key_range).into() + } + + /// Returns an iterator over the values of the map, sorted by key. + pub fn values(&self) -> ValuesIter { + self.iter_internal().into() + } + + /// Returns an iterator over the values of the map where keys + /// belong to the specified range. + pub fn values_range(&self, key_range: impl RangeBounds) -> ValuesIter { + self.range_internal(key_range).into() + } + + fn iter_internal(&self) -> IterInternal { + IterInternal::new(self) + } + + fn range_internal(&self, key_range: impl RangeBounds) -> IterInternal { if self.root_addr == NULL { // Map is empty. - return Iter::null(self); + return IterInternal::null(self); } let range = ( @@ -1022,17 +1062,7 @@ where key_range.end_bound().cloned(), ); - Iter::new_in_range(self, range) - } - - /// Returns an iterator pointing to the first element below the given bound. - /// Returns an empty iterator if there are no keys below the given bound. - pub fn iter_upper_bound(&self, bound: &K) -> Iter { - if let Some((start_key, _)) = self.range(..bound).next_back() { - Iter::new_in_range(self, (Bound::Included(start_key), Bound::Unbounded)) - } else { - Iter::null(self) - } + IterInternal::new_in_range(self, range) } // Merges one node (`source`) into another (`into`), along with a median entry. diff --git a/src/btreemap/iter.rs b/src/btreemap/iter.rs index 848757c2..5c683585 100644 --- a/src/btreemap/iter.rs +++ b/src/btreemap/iter.rs @@ -20,7 +20,7 @@ pub(crate) enum Index { /// An iterator over the entries of a [`BTreeMap`]. #[must_use = "iterators are lazy and do nothing unless consumed"] -pub struct Iter<'a, K, V, M> +pub(crate) struct IterInternal<'a, K, V, M> where K: Storable + Ord + Clone, V: Storable, @@ -43,7 +43,7 @@ where range: (Bound, Bound), } -impl<'a, K, V, M> Iter<'a, K, V, M> +impl<'a, K, V, M> IterInternal<'a, K, V, M> where K: Storable + Ord + Clone, V: Storable, @@ -428,8 +428,22 @@ where } } } + + fn count(&mut self) -> usize { + let mut cnt = 0; + while self.next_map(|_, _| ()).is_some() { + cnt += 1; + } + cnt + } } +pub struct Iter<'a, K, V, M>(IterInternal<'a, K, V, M>) +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory; + impl Iterator for Iter<'_, K, V, M> where K: Storable + Ord + Clone, @@ -439,8 +453,8 @@ where type Item = (K, V); fn next(&mut self) -> Option { - self.next_map(|node, entry_idx| { - let (key, encoded_value) = node.entry(entry_idx, self.map.memory()); + self.0.next_map(|node, entry_idx| { + let (key, encoded_value) = node.entry(entry_idx, self.0.map.memory()); (key, V::from_bytes(Cow::Owned(encoded_value))) }) } @@ -449,11 +463,7 @@ where where Self: Sized, { - let mut cnt = 0; - while self.next_map(|_, _| ()).is_some() { - cnt += 1; - } - cnt + self.0.count() } } @@ -464,13 +474,128 @@ where M: Memory, { fn next_back(&mut self) -> Option { - self.next_back_map(|node, entry_idx| { - let (key, encoded_value) = node.entry(entry_idx, self.map.memory()); + self.0.next_back_map(|node, entry_idx| { + let (key, encoded_value) = node.entry(entry_idx, self.0.map.memory()); (key, V::from_bytes(Cow::Owned(encoded_value))) }) } } +pub struct KeysIter<'a, K, V, M>(IterInternal<'a, K, V, M>) +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory; + +impl Iterator for KeysIter<'_, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + type Item = K; + + fn next(&mut self) -> Option { + self.0 + .next_map(|node, entry_idx| node.key(entry_idx).clone()) + } + + fn count(mut self) -> usize + where + Self: Sized, + { + self.0.count() + } +} + +impl DoubleEndedIterator for KeysIter<'_, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + fn next_back(&mut self) -> Option { + self.0 + .next_back_map(|node, entry_idx| node.key(entry_idx).clone()) + } +} + +pub struct ValuesIter<'a, K, V, M>(IterInternal<'a, K, V, M>) +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory; + +impl Iterator for ValuesIter<'_, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + type Item = V; + + fn next(&mut self) -> Option { + self.0.next_map(|node, entry_idx| { + let encoded_value = node.value(entry_idx, self.0.map.memory()); + V::from_bytes(Cow::Borrowed(&encoded_value)) + }) + } + + fn count(mut self) -> usize + where + Self: Sized, + { + self.0.count() + } +} + +impl DoubleEndedIterator for ValuesIter<'_, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + fn next_back(&mut self) -> Option { + self.0.next_back_map(|node, entry_idx| { + let encoded_value = node.value(entry_idx, self.0.map.memory()); + V::from_bytes(Cow::Borrowed(&encoded_value)) + }) + } +} + +impl<'a, K, V, M> From> for Iter<'a, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + fn from(value: IterInternal<'a, K, V, M>) -> Self { + Iter(value) + } +} + +impl<'a, K, V, M> From> for KeysIter<'a, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + fn from(value: IterInternal<'a, K, V, M>) -> Self { + KeysIter(value) + } +} + +impl<'a, K, V, M> From> for ValuesIter<'a, K, V, M> +where + K: Storable + Ord + Clone, + V: Storable, + M: Memory, +{ + fn from(value: IterInternal<'a, K, V, M>) -> Self { + ValuesIter(value) + } +} + #[cfg(test)] mod test { use super::*; @@ -635,4 +760,100 @@ mod test { assert!(iter.next().is_none()); assert!(iter.next_back().is_none()); } + + #[test] + fn keys_from_both_ends() { + let mem = make_memory(); + let mut btree = BTreeMap::new(mem); + + // Insert the elements in reverse order. + for i in (0..100u64).rev() { + btree.insert(i, i + 1); + } + + let mut iter = btree.keys(); + + for i in 0..50 { + let key = iter.next().unwrap(); + assert_eq!(key, i); + + let key = iter.next_back().unwrap(); + assert_eq!(key, 99 - i); + } + + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn keys_range_from_both_ends() { + let mem = make_memory(); + let mut btree = BTreeMap::new(mem); + + // Insert the elements in reverse order. + for i in (0..100u64).rev() { + btree.insert(i, i + 1); + } + + let mut iter = btree.keys_range(30..70); + + for i in 0..20 { + let key = iter.next().unwrap(); + assert_eq!(key, 30 + i); + + let key = iter.next_back().unwrap(); + assert_eq!(key, 69 - i); + } + + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn values_from_both_ends() { + let mem = make_memory(); + let mut btree = BTreeMap::new(mem); + + // Insert the elements in reverse order. + for i in (0..100u64).rev() { + btree.insert(i, i + 1); + } + + let mut iter = btree.values(); + + for i in 0..50 { + let value = iter.next().unwrap(); + assert_eq!(value, i + 1); + + let value = iter.next_back().unwrap(); + assert_eq!(value, 100 - i); + } + + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } + + #[test] + fn values_range_from_both_ends() { + let mem = make_memory(); + let mut btree = BTreeMap::new(mem); + + // Insert the elements in reverse order. + for i in (0..100u64).rev() { + btree.insert(i, i + 1); + } + + let mut iter = btree.values_range(30..70); + + for i in 0..20 { + let value = iter.next().unwrap(); + assert_eq!(value, 31 + i); + + let value = iter.next_back().unwrap(); + assert_eq!(value, 70 - i); + } + + assert!(iter.next().is_none()); + assert!(iter.next_back().is_none()); + } } diff --git a/src/btreemap/proptests.rs b/src/btreemap/proptests.rs index f800fde6..e8bf8675 100644 --- a/src/btreemap/proptests.rs +++ b/src/btreemap/proptests.rs @@ -17,6 +17,8 @@ enum Operation { Insert { key: Vec, value: Vec }, Iter { from: usize, len: usize }, IterRev { from: usize, len: usize }, + Keys { from: usize, len: usize }, + Values { from: usize, len: usize }, Get(usize), Remove(usize), Range { from: usize, len: usize }, @@ -35,6 +37,10 @@ fn operation_strategy() -> impl Strategy { .prop_map(|(from, len)| Operation::Iter { from, len }), 5 => (any::(), any::()) .prop_map(|(from, len)| Operation::IterRev { from, len }), + 5 => (any::(), any::()) + .prop_map(|(from, len)| Operation::Keys { from, len }), + 5 => (any::(), any::()) + .prop_map(|(from, len)| Operation::Values { from, len }), 50 => (any::()).prop_map(Operation::Get), 15 => (any::()).prop_map(Operation::Remove), 5 => (any::(), any::()) @@ -224,7 +230,7 @@ fn execute_operation( let from = from % std_btree.len(); let len = len % std_btree.len(); - eprintln!("Iterate({}, {})", from, len); + eprintln!("IterateRev({}, {})", from, len); let std_iter = std_btree.iter().rev().skip(from).take(len); let mut stable_iter = btree.iter().rev().skip(from).take(len); for (k1, v1) in std_iter { @@ -234,6 +240,42 @@ fn execute_operation( } assert!(stable_iter.next().is_none()); } + Operation::Keys { from, len } => { + assert_eq!(std_btree.len(), btree.len() as usize); + if std_btree.is_empty() { + return; + } + + let from = from % std_btree.len(); + let len = len % std_btree.len(); + + eprintln!("Keys({}, {})", from, len); + let std_iter = std_btree.keys().skip(from).take(len); + let mut stable_iter = btree.keys().skip(from).take(len); + for k1 in std_iter { + let k2 = stable_iter.next().unwrap(); + assert_eq!(k1, &k2); + } + assert!(stable_iter.next().is_none()); + } + Operation::Values { from, len } => { + assert_eq!(std_btree.len(), btree.len() as usize); + if std_btree.is_empty() { + return; + } + + let from = from % std_btree.len(); + let len = len % std_btree.len(); + + eprintln!("Values({}, {})", from, len); + let std_iter = std_btree.values().skip(from).take(len); + let mut stable_iter = btree.values().skip(from).take(len); + for v1 in std_iter { + let v2 = stable_iter.next().unwrap(); + assert_eq!(v1, &v2); + } + assert!(stable_iter.next().is_none()); + } Operation::Get(idx) => { assert_eq!(std_btree.len(), btree.len() as usize); if std_btree.is_empty() {