diff --git a/Cargo.toml b/Cargo.toml index e410fa6..1c9515f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ exclude = ["article/*"] [features] default = ["std"] std = [] +no-inlining = [] # Only relevant for throughput benchmarks bench-csv = [] bench-md = [] @@ -38,6 +39,7 @@ highway = "1.1.0" seahash = "4.1.0" metrohash = "1.0.6" fnv = "1.0.3" +aes_crypto = "1.2.0" [dev-dependencies.plotters] version = "0.3.5" diff --git a/README.md b/README.md index 3aabacb..7071bb3 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,9 @@ All generated hashes for a given version of GxHash are stable, meaning that for The `std` feature flag enables the `HashMap`/`HashSet` container convenience type aliases. This is on by default. Disable to make the crate `no_std`: ```toml -[dependencies.gxhash] +[dependencies] ... -default-features = false +gxhash = { version = "3", default-features = false, features = ["inlined"] } ``` ### `hybrid` @@ -121,7 +121,7 @@ GxHash is a non-cryptographic hashing algorithm, thus it is not recommended to u - Minor for API changes/removal - Patch for new APIs, bug fixes and performance improvements -> ℹ️ [cargo-show-asm](https://github.com/pacak/cargo-show-asm) is an easy way to view the actual generated assembly code (`cargo asm gxhash::gxhash::gxhash64`) (method `#[inline]` should be removed otherwise it won't be seen by the tool) +> ℹ️ [cargo-show-asm](https://github.com/pacak/cargo-show-asm) is an easy way to view the actual generated assembly code (`cargo asm gxhash::gxhash::gxhash64`). Eg: `cargo asm gxhash::gxhash::gxhash64 --lib --features no-inlining` (+ `| wc -l` to count lines, which is an approximate but quick way to get a sense of bytecode size). > ℹ️ [AMD μProf](https://www.amd.com/en/developer/uprof.html) gives some useful insights on time spent per instruction. ## Publication diff --git a/benches/hashset.rs b/benches/hashset.rs index a247a3a..ec7fd6b 100644 --- a/benches/hashset.rs +++ b/benches/hashset.rs @@ -22,13 +22,13 @@ fn benchmark(c: &mut Criterion, name: &str, value: T) { let mut group = c.benchmark_group(format!("HashSet/{}", name)); - let mut set = HashSet::::new(); - group.bench_function("Default Hasher", |b| { + let mut set: HashSet:: = gxhash::HashSet::::default(); + group.bench_function("GxHash", |b| { iterate(b, &value, &mut set); }); - let mut set: HashSet:: = gxhash::HashSet::::default(); - group.bench_function("GxHash", |b| { + let mut set = HashSet::::new(); + group.bench_function("Default Hasher", |b| { iterate(b, &value, &mut set); }); diff --git a/changes.txt b/changes.txt new file mode 100644 index 0000000..794bc12 --- /dev/null +++ b/changes.txt @@ -0,0 +1,39 @@ +# GxHash 3.X + +Bytecode: 201 +HashSet/u32/GxHash: 1.5724 ns +Throughput: + | 4 > 6278.58 + | 8 > 12620.74 + | 16 > 25315.13 + | 32 > 26450.76 + | 64 > 39590.37 + | 128 > 39402.75 + | 256 > 52222.14 + | 512 > 63567.70 + | 1024 > 71014.10 + | 2048 > 74969.55 + | 4096 > 80239.42 + | 8192 > 83975.67 + | 16384 > 82638.05 + | 32768 > 84528.13 + +# GxHash 4 (WIP) + +Bytecode: 190 +HashSet/u32/GxHash: 1.5426 ns +Throughput: + | 4 > 7360.11 + | 8 > 14769.95 + | 16 > 29555.45 + | 32 > 43083.63 + | 64 > 43083.63 + | 128 > 40690.10 + | 256 > 50511.85 + | 512 > 62827.61 + | 1024 > 70250.75 + | 2048 > 81630.13 + | 4096 > 87250.16 + | 8192 > 89831.86 + | 16384 > 88241.06 + | 32768 > 89616.23 \ No newline at end of file diff --git a/src/gxhash/mod.rs b/src/gxhash/mod.rs index 7fff5ed..fad3d1b 100644 --- a/src/gxhash/mod.rs +++ b/src/gxhash/mod.rs @@ -28,6 +28,7 @@ pub fn gxhash32(input: &[u8], seed: i64) -> u32 { /// let seed = 1234; /// println!("Hash is {:x}!", gxhash::gxhash64(&bytes, seed)); /// ``` +#[cfg(not(feature = "no-inlining"))] #[inline(always)] pub fn gxhash64(input: &[u8], seed: i64) -> u64 { unsafe { @@ -36,6 +37,15 @@ pub fn gxhash64(input: &[u8], seed: i64) -> u64 { } } +#[cfg(feature = "no-inlining")] +#[inline(never)] +pub fn gxhash64(input: &[u8], seed: i64) -> u64 { + unsafe { + let p = &gxhash(input, create_seed(seed)) as *const State as *const u64; + *p + } +} + /// Hashes an arbitrary stream of bytes to an u128. /// /// # Example @@ -67,23 +77,23 @@ pub(crate) use load_unaligned; #[inline(always)] pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State { - finalize(aes_encrypt(compress_all(input), seed)) + finalize(compress_all(input, seed)) } #[inline(always)] -pub(crate) unsafe fn compress_all(input: &[u8]) -> State { +pub(crate) unsafe fn compress_all(input: &[u8], seed: State) -> State { let len = input.len(); let mut ptr = input.as_ptr() as *const State; if len == 0 { - return create_empty(); + return seed; } if len <= VECTOR_SIZE { // Input fits on a single SIMD vector, however we might read beyond the input message // Thus we need this safe method that checks if it can safely read beyond or must copy - return get_partial(ptr, len); + return xor(get_partial(ptr, len), seed); } let mut hash_vector: State; @@ -102,6 +112,8 @@ pub(crate) unsafe fn compress_all(input: &[u8]) -> State { ptr = ptr.cast::().add(extra_bytes_count).cast(); } + hash_vector = xor(hash_vector, seed); + load_unaligned!(ptr, v0); if len > VECTOR_SIZE * 2 { @@ -152,6 +164,8 @@ unsafe fn compress_many(mut ptr: *const State, end: usize, hash_vector: State, l #[cfg(test)] mod tests { + use crate::gxhash; + use super::*; use rand::Rng; @@ -213,14 +227,64 @@ mod tests { assert_ne!(0, gxhash32(&[0u8; 1200], 0)); } + // #[test] + // fn is_stable() { + // assert_eq!(2533353535, gxhash32(&[0u8; 0], 0)); + // assert_eq!(4243413987, gxhash32(&[0u8; 1], 0)); + // assert_eq!(2401749549, gxhash32(&[0u8; 1000], 0)); + // assert_eq!(4156851105, gxhash32(&[42u8; 4242], 42)); + // assert_eq!(1981427771, gxhash32(&[42u8; 4242], -42)); + // assert_eq!(1156095992, gxhash32(b"Hello World", i64::MAX)); + // assert_eq!(540827083, gxhash32(b"Hello World", i64::MIN)); + // } + #[test] - fn is_stable() { - assert_eq!(2533353535, gxhash32(&[0u8; 0], 0)); - assert_eq!(4243413987, gxhash32(&[0u8; 1], 0)); - assert_eq!(2401749549, gxhash32(&[0u8; 1000], 0)); - assert_eq!(4156851105, gxhash32(&[42u8; 4242], 42)); - assert_eq!(1981427771, gxhash32(&[42u8; 4242], -42)); - assert_eq!(1156095992, gxhash32(b"Hello World", i64::MAX)); - assert_eq!(540827083, gxhash32(b"Hello World", i64::MIN)); + fn issue_83_multicollision() { + + let zero_key = aes_crypto::AesBlock::zero(); + + let mut s0 = [0u8; 192]; + let mut s1 = [0u8; 192]; + + s0[64] = 100; + s1[64] = 42; + + let v0 = aes_crypto::AesBlock::new(s0[64..64 + 16].try_into().unwrap()); + v0.enc(zero_key).store_to(&mut s0[64 + 32..]); + + let v0 = aes_crypto::AesBlock::new(s1[64..64 + 16].try_into().unwrap()); + v0.enc(zero_key).store_to(&mut s1[64 + 32..]); + + // Different strings. + assert!(s0 != s1); + + // Collide regardless of seed. + assert!(gxhash::gxhash128(&s0, 0) != gxhash::gxhash128(&s1, 0)); + assert!(gxhash::gxhash128(&s0, 0xdeadbeef) != gxhash::gxhash128(&s1, 0xdeadbeef)); + } + + #[test] + fn issue_83_multicollision_dec() { + + let zero_key = aes_crypto::AesBlock::zero(); + + let mut s0 = [0u8; 192]; + let mut s1 = [0u8; 192]; + + s0[64] = 100; + s1[64] = 42; + + let v0 = aes_crypto::AesBlock::new(s0[64..64 + 16].try_into().unwrap()); + v0.dec(zero_key).store_to(&mut s0[64 + 32..]); + + let v0 = aes_crypto::AesBlock::new(s1[64..64 + 16].try_into().unwrap()); + v0.dec(zero_key).store_to(&mut s1[64 + 32..]); + + // Different strings. + assert!(s0 != s1); + + // Collide regardless of seed. + assert!(gxhash::gxhash128(&s0, 0) != gxhash::gxhash128(&s1, 0)); + assert!(gxhash::gxhash128(&s0, 0xdeadbeef) != gxhash::gxhash128(&s1, 0xdeadbeef)); } } diff --git a/src/gxhash/platform/arm.rs b/src/gxhash/platform/arm.rs index c7a550e..b23890c 100644 --- a/src/gxhash/platform/arm.rs +++ b/src/gxhash/platform/arm.rs @@ -64,6 +64,12 @@ pub unsafe fn aes_encrypt_last(data: State, keys: State) -> State { vreinterpretq_s8_u8(veorq_u8(encrypted, vreinterpretq_u8_s8(keys))) } +#[inline(always)] +// See https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a +pub unsafe fn xor(a: State, b: State) -> State { + veorq_s8(a, b) +} + #[inline(always)] pub unsafe fn ld(array: *const u32) -> State { vreinterpretq_s8_u32(vld1q_u32(array)) @@ -72,10 +78,6 @@ pub unsafe fn ld(array: *const u32) -> State { #[inline(always)] pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: State, len: usize) -> State { - // Disambiguation vectors - let mut t1: State = create_empty(); - let mut t2: State = create_empty(); - // Hash is processed in two separate 128-bit parallel lanes // This allows the same processing to be applied using 256-bit V-AES instrinsics // so that hashes are stable in both cases. @@ -86,8 +88,11 @@ pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: crate::gxhash::load_unaligned!(ptr, v0, v1, v2, v3, v4, v5, v6, v7); - let mut tmp1 = aes_encrypt(v0, v2); - let mut tmp2 = aes_encrypt(v1, v3); + let mut tmp1 = aes_encrypt(v0, hash_vector); + let mut tmp2 = aes_encrypt(v1, hash_vector); + + tmp1 = aes_encrypt(tmp1, v2); + tmp2 = aes_encrypt(tmp2, v3); tmp1 = aes_encrypt(tmp1, v4); tmp2 = aes_encrypt(tmp2, v5); @@ -95,11 +100,8 @@ pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: tmp1 = aes_encrypt(tmp1, v6); tmp2 = aes_encrypt(tmp2, v7); - t1 = vaddq_s8(t1, ld(KEYS.as_ptr())); - t2 = vaddq_s8(t2, ld(KEYS.as_ptr().offset(4))); - - lane1 = aes_encrypt_last(aes_encrypt(tmp1, t1), lane1); - lane2 = aes_encrypt_last(aes_encrypt(tmp2, t2), lane2); + lane1 = aes_encrypt_last(tmp1, lane1); + lane2 = aes_encrypt_last(tmp2, lane2); } // For 'Zeroes' test let len_vec = vreinterpretq_s8_u32(vdupq_n_u32(len as u32)); diff --git a/src/hasher.rs b/src/hasher.rs index 88ca99e..ad040fe 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -94,7 +94,7 @@ macro_rules! write { #[inline] fn $name(&mut self, value: $type) { self.state = unsafe { - aes_encrypt_last($load(value), aes_encrypt(self.state, ld(KEYS.as_ptr()))) + aes_encrypt(self.state, $load(value)) }; } } @@ -112,7 +112,7 @@ impl Hasher for GxHasher { #[inline] fn write(&mut self, bytes: &[u8]) { // Improvement: only compress at this stage and finalize in finish - self.state = unsafe { aes_encrypt_last(compress_all(bytes), aes_encrypt(self.state, ld(KEYS.as_ptr()))) }; + self.state = unsafe { compress_all(bytes, self.state) }; } write!(write_u8, u8, load_u8);