From a083dd653af0f7f46ba6058ab51e1f9d6a2aca7d Mon Sep 17 00:00:00 2001 From: The8472 Date: Fri, 5 Nov 2021 01:07:23 +0100 Subject: [PATCH] optimize Hash for Path Hashing does not have to use the whole Components parsing machinery because we only need it to match the normalizations that Components does. * stripping redundant separators -> skipping separators * stripping redundant '.' directories -> skipping '.' following after a separator That's all it takes. And instead of hashing individual slices for each component we feed the bytes directly into the hasher which avoids hashing the length of each component in addition to its contents. --- library/std/src/path.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index dc0c735a06c6f..aca3a42f20ad1 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -2873,9 +2873,35 @@ impl cmp::PartialEq for Path { #[stable(feature = "rust1", since = "1.0.0")] impl Hash for Path { fn hash(&self, h: &mut H) { - for component in self.components() { - component.hash(h); + let bytes = self.as_u8_slice(); + + let mut component_start = 0; + let mut bytes_hashed = 0; + + for i in 0..bytes.len() { + if is_sep_byte(bytes[i]) { + if i > component_start { + let to_hash = &bytes[component_start..i]; + h.write(to_hash); + bytes_hashed += to_hash.len(); + } + + // skip over separator and optionally a following CurDir item + // since components() would normalize these away + component_start = i + match bytes[i..] { + [_, b'.', b'/', ..] | [_, b'.'] => 2, + _ => 1, + }; + } + } + + if component_start < bytes.len() { + let to_hash = &bytes[component_start..]; + h.write(to_hash); + bytes_hashed += to_hash.len(); } + + h.write_usize(bytes_hashed); } }