From 0e5ad51604f8656c7c0ecc137750413bac0ffa94 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 9 Dec 2024 08:25:33 -0800 Subject: [PATCH 1/5] Drop aHash dependency in favor of hashbrown's choice of default hasher --- Cargo.toml | 3 +-- src/intern.rs | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8140405..956dbbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,7 @@ maintenance = { status = "actively-developed" } [dependencies] -ahash = "0.8.0" -hashbrown = { version = "0.15", default-features = false, features = ["inline-more"] } +hashbrown = { version = "0.15", default-features = false, features = ["default-hasher", "inline-more"] } [features] default = ["unified_diff"] diff --git a/src/intern.rs b/src/intern.rs index 62349d6..443a779 100644 --- a/src/intern.rs +++ b/src/intern.rs @@ -1,8 +1,8 @@ -use std::hash::Hash; +use std::hash::{BuildHasher as _, Hash, Hasher as _}; use std::ops::Index; -use ahash::RandomState; use hashbrown::hash_table::{Entry, HashTable}; +use hashbrown::DefaultHashBuilder as RandomState; /// A token represented as an interned integer. /// @@ -114,7 +114,7 @@ impl Interner { Interner { tokens: Vec::with_capacity(capacity), table: HashTable::with_capacity(capacity), - hasher: RandomState::new(), + hasher: RandomState::default(), } } @@ -126,11 +126,11 @@ impl Interner { /// Intern `token` and return a the interned integer pub fn intern(&mut self, token: T) -> Token { - let hash = self.hasher.hash_one(&token); + let hash = hash_one(&self.hasher, &token); match self.table.entry( hash, |&it| self.tokens[it.0 as usize] == token, - |&token| self.hasher.hash_one(&self.tokens[token.0 as usize]), + |&token| hash_one(&self.hasher, &self.tokens[token.0 as usize]), ) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { @@ -155,14 +155,14 @@ impl Interner { if retained <= erased { self.table.clear(); for (i, token) in self.tokens[0..retained].iter().enumerate() { - let hash = self.hasher.hash_one(token); + let hash = hash_one(&self.hasher, &token); self.table.insert_unique(hash, Token(i as u32), |&token| { - self.hasher.hash_one(&self.tokens[token.0 as usize]) + hash_one(&self.hasher, &self.tokens[token.0 as usize]) }); } } else { for (i, token) in self.tokens[retained..].iter().enumerate() { - let hash = self.hasher.hash_one(token); + let hash = hash_one(&self.hasher, &token); match self .table .find_entry(hash, |token| token.0 == (retained + i) as u32) @@ -182,3 +182,12 @@ impl Index for Interner { &self.tokens[index.0 as usize] } } + +// TODO: remove in favor of BuildHasher::hash_one once compilers older than 1.71 +// no longer need to be supported. +// https://doc.rust-lang.org/std/hash/trait.BuildHasher.html#method.hash_one +fn hash_one(hasher_parameters: &RandomState, token: &T) -> u64 { + let mut hasher = hasher_parameters.build_hasher(); + token.hash(&mut hasher); + hasher.finish() +} From 9e90897a5e6f0e5a1d9685e5cf2548c8ca348dd3 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 9 Dec 2024 11:15:55 -0800 Subject: [PATCH 2/5] Raise required compiler to Rust 1.71 For std::hash::BuildHasher::hash_one. https://doc.rust-lang.org/std/hash/trait.BuildHasher.html#method.hash_one --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce27de2..687e770 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: toolchain: - - "1.65" + - "1.71" - stable runs-on: ubuntu-latest steps: diff --git a/Cargo.toml b/Cargo.toml index 956dbbb..4a96d2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "imara-diff" version = "0.1.7" edition = "2021" authors = ["pascalkuthe "] -rust-version = "1.61" +rust-version = "1.71" license = "Apache-2.0" description = "A high performance library for computing diffs." From 689694bed2465bdb3d08ad94c8b2b48e0138a0ef Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 9 Dec 2024 11:17:06 -0800 Subject: [PATCH 3/5] Use std::hash::BuildHasher::hash_one --- src/intern.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/intern.rs b/src/intern.rs index 443a779..800fdca 100644 --- a/src/intern.rs +++ b/src/intern.rs @@ -1,4 +1,4 @@ -use std::hash::{BuildHasher as _, Hash, Hasher as _}; +use std::hash::{BuildHasher as _, Hash}; use std::ops::Index; use hashbrown::hash_table::{Entry, HashTable}; @@ -126,11 +126,11 @@ impl Interner { /// Intern `token` and return a the interned integer pub fn intern(&mut self, token: T) -> Token { - let hash = hash_one(&self.hasher, &token); + let hash = self.hasher.hash_one(&token); match self.table.entry( hash, |&it| self.tokens[it.0 as usize] == token, - |&token| hash_one(&self.hasher, &self.tokens[token.0 as usize]), + |&token| self.hasher.hash_one(&self.tokens[token.0 as usize]), ) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { @@ -155,14 +155,14 @@ impl Interner { if retained <= erased { self.table.clear(); for (i, token) in self.tokens[0..retained].iter().enumerate() { - let hash = hash_one(&self.hasher, &token); + let hash = self.hasher.hash_one(token); self.table.insert_unique(hash, Token(i as u32), |&token| { - hash_one(&self.hasher, &self.tokens[token.0 as usize]) + self.hasher.hash_one(&self.tokens[token.0 as usize]) }); } } else { for (i, token) in self.tokens[retained..].iter().enumerate() { - let hash = hash_one(&self.hasher, &token); + let hash = self.hasher.hash_one(token); match self .table .find_entry(hash, |token| token.0 == (retained + i) as u32) @@ -182,12 +182,3 @@ impl Index for Interner { &self.tokens[index.0 as usize] } } - -// TODO: remove in favor of BuildHasher::hash_one once compilers older than 1.71 -// no longer need to be supported. -// https://doc.rust-lang.org/std/hash/trait.BuildHasher.html#method.hash_one -fn hash_one(hasher_parameters: &RandomState, token: &T) -> u64 { - let mut hasher = hasher_parameters.build_hasher(); - token.hash(&mut hasher); - hasher.finish() -} From 1edf8b3c7094d11fdd8bafdc757cc39a26418e07 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 9 Dec 2024 11:33:24 -0800 Subject: [PATCH 4/5] Resolve unnecessary_lazy_evaluations clippy lint warning: unnecessary closure used with `bool::then` --> src/myers/middle_snake.rs:248:9 | 248 | (best_score > 0).then(|| (best_token_idx1, best_token_idx2)) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_lazy_evaluations = note: `#[warn(clippy::unnecessary_lazy_evaluations)]` on by default help: use `then_some` instead | 248 | (best_score > 0).then_some((best_token_idx1, best_token_idx2)) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- src/myers/middle_snake.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/myers/middle_snake.rs b/src/myers/middle_snake.rs index 9a33554..4b2ba1d 100644 --- a/src/myers/middle_snake.rs +++ b/src/myers/middle_snake.rs @@ -245,7 +245,7 @@ impl MiddleSnakeSearch { k -= 2; } - (best_score > 0).then(|| (best_token_idx1, best_token_idx2)) + (best_score > 0).then_some((best_token_idx1, best_token_idx2)) } } From 0bd0b9c02d03e4d07eecfe183175be19087f7c84 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 9 Dec 2024 11:34:01 -0800 Subject: [PATCH 5/5] Resolve derivable_impls clippy lint warning: this `impl` can be derived --> src/lib.rs:233:1 | 233 | / impl Default for Algorithm { 234 | | fn default() -> Self { 235 | | Algorithm::Histogram 236 | | } 237 | | } | |_^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#derivable_impls = note: `#[warn(clippy::derivable_impls)]` on by default help: replace the manual implementation with a derive attribute and mark the default variant | 175 + #[derive(Default)] 176 ~ pub enum Algorithm { 177 | /// A variation of the [`patience` diff algorithm described by Bram Cohen's blog post](https://bramcohen.livejournal.com/73318.html) ... 202 | /// be used instead. 203 ~ #[default] 204 ~ Histogram, | --- src/lib.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 19f0f5e..593f59b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -171,7 +171,7 @@ mod tests; /// `imara-diff` supports multiple different algorithms /// for computing an edit sequence. /// These algorithms have different performance and all produce different output. -#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] pub enum Algorithm { /// A variation of the [`patience` diff algorithm described by Bram Cohen's blog post](https://bramcohen.livejournal.com/73318.html) /// that uses a histogram to find the least common LCS. @@ -199,6 +199,7 @@ pub enum Algorithm { /// fallback to Myers algorithm. However this detection has a nontrivial overhead, so /// if its known upfront that the sort of tokens is very small `Myers` algorithm should /// be used instead. + #[default] Histogram, /// An implementation of the linear space variant of /// [Myers `O((N+M)D)` algorithm](http://www.xmailserver.org/diff2.pdf). @@ -230,12 +231,6 @@ impl Algorithm { const ALL: [Self; 2] = [Algorithm::Histogram, Algorithm::Myers]; } -impl Default for Algorithm { - fn default() -> Self { - Algorithm::Histogram - } -} - /// Computes an edit-script that transforms `input.before` into `input.after` using /// the specified `algorithm` /// The edit-script is passed to `sink.process_change` while it is produced.