From e189d7060b5c1f0f39ad692db3cc3183b86f1041 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 16 Mar 2020 09:56:42 +0000 Subject: [PATCH 1/3] Implement clone_from for HashMap --- src/map.rs | 34 +++++++++++++++- src/raw/mod.rs | 105 +++++++++++++++++++++++++++++++++++-------------- src/set.rs | 13 +++++- 3 files changed, 120 insertions(+), 32 deletions(-) diff --git a/src/map.rs b/src/map.rs index 01946e2ba7..ce12da1a06 100644 --- a/src/map.rs +++ b/src/map.rs @@ -188,12 +188,29 @@ pub enum DefaultHashBuilder {} /// .iter().cloned().collect(); /// // use the values stored in map /// ``` -#[derive(Clone)] pub struct HashMap { pub(crate) hash_builder: S, pub(crate) table: RawTable<(K, V)>, } +impl Clone for HashMap { + fn clone(&self) -> Self { + HashMap { + hash_builder: self.hash_builder.clone(), + table: self.table.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + // We clone the hash_builder first since this might panic and we don't + // want the table to have elements hashed with the wrong hash_builder. + let hash_builder = source.hash_builder.clone(); + + self.table.clone_from(&source.table); + self.hash_builder = hash_builder; + } +} + #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { let mut state = hash_builder.build_hasher(); @@ -2820,6 +2837,21 @@ mod test_map { assert_eq!(m2.len(), 2); } + #[test] + fn test_clone_from() { + let mut m = HashMap::new(); + let mut m2 = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + m2.clone_from(&m); + assert_eq!(*m2.get(&1).unwrap(), 2); + assert_eq!(*m2.get(&2).unwrap(), 4); + assert_eq!(m2.len(), 2); + } + thread_local! { static DROP_VECTOR: RefCell> = RefCell::new(Vec::new()) } #[derive(Hash, PartialEq, Eq)] diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 8bc9641a2d..9e2f72466f 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -987,47 +987,92 @@ impl Clone for RawTable { .unwrap_or_else(|_| hint::unreachable_unchecked()), ); - // Copy the control bytes unchanged. We do this in a single pass - self.ctrl(0) - .copy_to_nonoverlapping(new_table.ctrl(0), self.num_ctrl_bytes()); - - { - // The cloning of elements may panic, in which case we need - // to make sure we drop only the elements that have been - // cloned so far. - let mut guard = guard((0, &mut new_table), |(index, new_table)| { - if mem::needs_drop::() { - for i in 0..=*index { - if is_full(*new_table.ctrl(i)) { - new_table.bucket(i).drop(); - } - } - } - new_table.free_buckets(); - }); + new_table.clone_from_impl(self, |new_table| { + // We need to free the memory allocated for the new table. + new_table.free_buckets(); + }); - for from in self.iter() { - let index = self.bucket_index(&from); - let to = guard.1.bucket(index); - to.write(from.as_ref().clone()); + // Return the newly created table. + ManuallyDrop::into_inner(new_table) + } + } + } - // Update the index in case we need to unwind. - guard.0 = index; + fn clone_from(&mut self, source: &Self) { + if source.is_empty_singleton() { + *self = Self::new(); + } else { + unsafe { + // First, drop all our elements without clearing the control bytes. + if mem::needs_drop::() { + for item in self.iter() { + item.drop(); } + } - // Successfully cloned all items, no need to clean up. - mem::forget(guard); + // If necessary, resize our table to match the source. + if self.buckets() != source.buckets() { + // Skip our drop by using ptr::write. + self.free_buckets(); + (self as *mut Self).write( + Self::new_uninitialized(source.buckets(), Fallibility::Infallible) + .unwrap_or_else(|_| hint::unreachable_unchecked()), + ); } - // Return the newly created table. - new_table.items = self.items; - new_table.growth_left = self.growth_left; - ManuallyDrop::into_inner(new_table) + self.clone_from_impl(source, |self_| { + // We need to leave the table in an empty state. + self_.clear_no_drop() + }); } } } } +impl RawTable { + /// Common code for clone and clone_from. Assumes `self.buckets() == source.buckets()`. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_impl(&mut self, source: &Self, mut on_panic: impl FnMut(&mut Self)) { + // Copy the control bytes unchanged. We do this in a single pass + source + .ctrl(0) + .copy_to_nonoverlapping(self.ctrl(0), self.num_ctrl_bytes()); + + // The cloning of elements may panic, in which case we need + // to make sure we drop only the elements that have been + // cloned so far. + let mut guard = guard((0, &mut *self), |(index, self_)| { + if mem::needs_drop::() { + for i in 0..=*index { + if is_full(*self_.ctrl(i)) { + self_.bucket(i).drop(); + } + } + } + + // Depending on whether we were called from clone or clone_from, we + // either need to free the memory for the destination table or just + // clear the control bytes. + on_panic(self_); + }); + + for from in source.iter() { + let index = source.bucket_index(&from); + let to = guard.1.bucket(index); + to.write(from.as_ref().clone()); + + // Update the index in case we need to unwind. + guard.0 = index; + } + + // Successfully cloned all items, no need to clean up. + mem::forget(guard); + + self.items = source.items; + self.growth_left = source.growth_left; + } +} + #[cfg(feature = "nightly")] unsafe impl<#[may_dangle] T> Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] diff --git a/src/set.rs b/src/set.rs index 537a10d770..e5a94c2de5 100644 --- a/src/set.rs +++ b/src/set.rs @@ -111,11 +111,22 @@ use super::map::{self, DefaultHashBuilder, HashMap, Keys}; /// [`HashMap`]: struct.HashMap.html /// [`PartialEq`]: https://doc.rust-lang.org/std/cmp/trait.PartialEq.html /// [`RefCell`]: https://doc.rust-lang.org/std/cell/struct.RefCell.html -#[derive(Clone)] pub struct HashSet { pub(crate) map: HashMap, } +impl Clone for HashSet { + fn clone(&self) -> Self { + HashSet { + map: self.map.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + self.map.clone_from(&source.map); + } +} + #[cfg(feature = "ahash")] impl HashSet { /// Creates an empty `HashSet`. From 4f905bdbc2783eb1dc4e8c6978b348105de12478 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 16 Mar 2020 10:58:28 +0000 Subject: [PATCH 2/3] Optimize Clone with specialization --- src/lib.rs | 1 + src/map.rs | 29 ++++++++++++++++- src/raw/mod.rs | 87 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7662f03092..63b180c1ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ test, core_intrinsics, dropck_eyepatch, + specialization, ) )] #![allow( diff --git a/src/map.rs b/src/map.rs index ce12da1a06..edb9401507 100644 --- a/src/map.rs +++ b/src/map.rs @@ -206,7 +206,34 @@ impl Clone for HashMap { // want the table to have elements hashed with the wrong hash_builder. let hash_builder = source.hash_builder.clone(); - self.table.clone_from(&source.table); + #[cfg(not(feature = "nightly"))] + { + self.table.clone_from(&source.table); + } + #[cfg(feature = "nightly")] + { + trait HashClone { + fn clone_from(&mut self, source: &Self, hash_builder: &S); + } + impl HashClone for HashMap { + default fn clone_from(&mut self, source: &Self, _hash_builder: &S) { + self.table.clone_from(&source.table); + } + } + impl HashClone for HashMap + where + K: Eq + Hash, + S: BuildHasher, + { + fn clone_from(&mut self, source: &Self, hash_builder: &S) { + self.table + .clone_from_with_hasher(&source.table, |x| make_hash(hash_builder, &x.0)); + } + } + HashClone::clone_from(self, source, &hash_builder); + } + + // Update hash_builder only if we successfully cloned all elements. self.hash_builder = hash_builder; } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 9e2f72466f..f22a63d4e2 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -987,7 +987,7 @@ impl Clone for RawTable { .unwrap_or_else(|_| hint::unreachable_unchecked()), ); - new_table.clone_from_impl(self, |new_table| { + new_table.clone_from_spec(self, |new_table| { // We need to free the memory allocated for the new table. new_table.free_buckets(); }); @@ -1013,14 +1013,16 @@ impl Clone for RawTable { // If necessary, resize our table to match the source. if self.buckets() != source.buckets() { // Skip our drop by using ptr::write. - self.free_buckets(); + if !self.is_empty_singleton() { + self.free_buckets(); + } (self as *mut Self).write( Self::new_uninitialized(source.buckets(), Fallibility::Infallible) .unwrap_or_else(|_| hint::unreachable_unchecked()), ); } - self.clone_from_impl(source, |self_| { + self.clone_from_spec(source, |self_| { // We need to leave the table in an empty state. self_.clear_no_drop() }); @@ -1029,6 +1031,40 @@ impl Clone for RawTable { } } +/// Specialization of `clone_from` for `Copy` types +trait RawTableClone { + unsafe fn clone_from_spec(&mut self, source: &Self, on_panic: impl FnMut(&mut Self)); +} +impl RawTableClone for RawTable { + #[cfg(feature = "nightly")] + #[cfg_attr(feature = "inline-more", inline)] + default unsafe fn clone_from_spec(&mut self, source: &Self, on_panic: impl FnMut(&mut Self)) { + self.clone_from_impl(source, on_panic); + } + + #[cfg(not(feature = "nightly"))] + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_spec(&mut self, source: &Self, on_panic: impl FnMut(&mut Self)) { + self.clone_from_impl(source, on_panic); + } +} +#[cfg(feature = "nightly")] +impl RawTableClone for RawTable { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_spec(&mut self, source: &Self, _on_panic: impl FnMut(&mut Self)) { + source + .ctrl(0) + .copy_to_nonoverlapping(self.ctrl(0), self.num_ctrl_bytes()); + source + .data + .as_ptr() + .copy_to_nonoverlapping(self.data.as_ptr(), self.buckets()); + + self.items = source.items; + self.growth_left = source.growth_left; + } +} + impl RawTable { /// Common code for clone and clone_from. Assumes `self.buckets() == source.buckets()`. #[cfg_attr(feature = "inline-more", inline)] @@ -1071,6 +1107,51 @@ impl RawTable { self.items = source.items; self.growth_left = source.growth_left; } + + /// Variant of `clone_from` to use when a hasher is available. + #[cfg(any(feature = "nightly", feature = "raw"))] + pub fn clone_from_with_hasher(&mut self, source: &Self, hasher: impl Fn(&T) -> u64) { + // If we have enough capacity in the table, just clear it and insert + // elements one by one. We don't do this if we have the same number of + // buckets as the source since we can just copy the contents directly + // in that case. + if self.buckets() != source.buckets() + && bucket_mask_to_capacity(self.bucket_mask) >= source.len() + { + self.clear(); + + let guard_self = guard(&mut *self, |self_| { + // Clear the partially copied table if a panic occurs, otherwise + // items and growth_left will be out of sync with the contents + // of the table. + self_.clear(); + }); + + unsafe { + for item in source.iter() { + // This may panic. + let item = item.as_ref().clone(); + let hash = hasher(&item); + + // We can use a simpler version of insert() here since: + // - there are no DELETED entries. + // - we know there is enough space in the table. + // - all elements are unique. + let index = guard_self.find_insert_slot(hash); + guard_self.set_ctrl(index, h2(hash)); + guard_self.bucket(index).write(item); + } + } + + // Successfully cloned all items, no need to clean up. + mem::forget(guard_self); + + self.items = source.items; + self.growth_left -= source.items; + } else { + self.clone_from(source); + } + } } #[cfg(feature = "nightly")] From c9b320e8aac06fdd0fb669d131c17970da394563 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 16 Mar 2020 12:02:24 +0000 Subject: [PATCH 3/3] Add benchmarks for clone --- benches/bench.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/benches/bench.rs b/benches/bench.rs index 4abdb17fb1..771e7169a5 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -206,3 +206,55 @@ bench_suite!( iter_ahash_random, iter_std_random ); + +#[bench] +fn clone_small(b: &mut Bencher) { + let mut m = HashMap::new(); + for i in 0..10 { + m.insert(i, i); + } + + b.iter(|| { + black_box(m.clone()); + }) +} + +#[bench] +fn clone_from_small(b: &mut Bencher) { + let mut m = HashMap::new(); + let mut m2 = HashMap::new(); + for i in 0..10 { + m.insert(i, i); + } + + b.iter(|| { + m2.clone_from(&m); + black_box(&mut m2); + }) +} + +#[bench] +fn clone_large(b: &mut Bencher) { + let mut m = HashMap::new(); + for i in 0..1000 { + m.insert(i, i); + } + + b.iter(|| { + black_box(m.clone()); + }) +} + +#[bench] +fn clone_from_large(b: &mut Bencher) { + let mut m = HashMap::new(); + let mut m2 = HashMap::new(); + for i in 0..1000 { + m.insert(i, i); + } + + b.iter(|| { + m2.clone_from(&m); + black_box(&mut m2); + }) +}