From cfe9f6fbbf49d91deaae063faa84da3247862011 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Wed, 8 Jan 2025 21:15:42 -0300 Subject: [PATCH 01/13] Revert "Remove crossbeam-utils reference." This reverts commit a71499af129608608be35bfb1caee615b66ab226. --- src/encoding.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index 3843c99..311d133 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -158,10 +158,8 @@ pub fn num_len(number: u32) -> u32 { #[cfg(test)] mod tests { - use crate::config::Config; - use crate::context::Context; use crate::decoded::Decoded; - use crate::encoding::{base64_len, decode_string, encode_string, num_len}; + use crate::encoding::{base64_len, decode_string, num_len}; use crate::error::Error; use crate::variant::Variant; use crate::version::Version; @@ -357,6 +355,7 @@ mod tests { assert_eq!(result, Err(Error::DecodingFail)); } + #[cfg(feature = "crossbeam-utils")] #[test] fn encode_string_returns_correct_string() { let hash = b"12345678901234567890123456789012".to_vec(); From 16b514d503e66c316cadc7a2c04ec0f6f02ed634 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Wed, 8 Jan 2025 21:15:50 -0300 Subject: [PATCH 02/13] Revert "Remove mentions of ThreadMode." This reverts commit 06da41a75e1abdb1b700d94d3b341aa4bc3e4c93. --- README.md | 3 ++- src/argon2.rs | 6 ++++++ src/encoding.rs | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c06db4..a6e6e76 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ assert!(matches); Create a password hash with custom settings and verify it: ```rust -use argon2::{self, Config, Variant, Version}; +use argon2::{self, Config, ThreadMode, Variant, Version}; let password = b"password"; let salt = b"othersalt"; @@ -49,6 +49,7 @@ let config = Config { mem_cost: 65536, time_cost: 10, lanes: 4, + thread_mode: ThreadMode::Parallel, secret: &[], ad: &[], hash_length: 32 diff --git a/src/argon2.rs b/src/argon2.rs index 15e5c01..fe4cf4e 100644 --- a/src/argon2.rs +++ b/src/argon2.rs @@ -213,6 +213,12 @@ mod tests { #[test] fn single_thread_verification_multi_lane_hash() { + /* + let hash = hash_encoded(b"foo", b"abcdefghijklmnopqrstuvwxyz", &Config { + lanes: 4, thread_mode: ThreadMode::Parallel, + ..Config::default() + }); + */ let hash = "$argon2i$v=19$m=4096,t=3,p=4$YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo$BvBk2OaSofBHfbrUW61nHrWB/43xgfs/QJJ5DkMAd8I"; verify_encoded(hash, b"foo").unwrap(); } diff --git a/src/encoding.rs b/src/encoding.rs index 311d133..198dc61 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -365,6 +365,7 @@ mod tests { lanes: 1, mem_cost: 4096, secret: &[], + thread_mode: ThreadMode::Parallel, time_cost: 3, variant: Variant::Argon2i, version: Version::Version13, From 65a42e49a76ca5cfdb6553aeed3f8a880739baba Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Wed, 8 Jan 2025 21:20:07 -0300 Subject: [PATCH 03/13] Revert "Remove parallel execution to prevent UB." This reverts commit 3b6dcc197ebb48bc0b5b9efee49e509b57e2bd74. --- Cargo.toml | 4 +++ src/argon2.rs | 37 ++++++++++++++++++---- src/config.rs | 29 +++++++++++++++++- src/context.rs | 2 ++ src/core.rs | 34 ++++++++++++++++++++- src/encoding.rs | 8 +++++ src/lib.rs | 15 +++++++-- src/memory.rs | 19 ++++++++++++ src/thread_mode.rs | 64 +++++++++++++++++++++++++++++++++++++++ tests/integration_test.rs | 6 +++- 10 files changed, 207 insertions(+), 11 deletions(-) create mode 100644 src/thread_mode.rs diff --git a/Cargo.toml b/Cargo.toml index fc6f989..6035f18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,14 @@ keywords = ["argon2", "argon2d", "argon2i", "hash", "password"] [lib] name = "argon2" +[features] +default = ["crossbeam-utils"] + [dependencies] base64 = "0.21" blake2b_simd = "1.0" constant_time_eq = "0.3.0" +crossbeam-utils = { version = "0.8", optional = true } serde = { version = "1.0", optional = true, features=["derive"] } [dev-dependencies] diff --git a/src/argon2.rs b/src/argon2.rs index fe4cf4e..9a3fedb 100644 --- a/src/argon2.rs +++ b/src/argon2.rs @@ -12,6 +12,7 @@ use crate::core; use crate::encoding; use crate::memory::Memory; use crate::result::Result; +use crate::thread_mode::ThreadMode; use crate::variant::Variant; use crate::version::Version; @@ -73,16 +74,25 @@ pub fn encoded_len( /// ``` /// /// -/// Create an Argon2d encoded hash with 4 lanes: +/// Create an Argon2d encoded hash with 4 lanes and parallel execution: /// /// ``` -/// use argon2::{self, Config, Variant}; +/// use argon2::{self, Config, ThreadMode, Variant}; /// /// let pwd = b"password"; /// let salt = b"somesalt"; /// let mut config = Config::default(); /// config.variant = Variant::Argon2d; -/// config.lanes = 4; +#[cfg_attr(feature = "crossbeam-utils", doc = "config.lanes = 4;")] +#[cfg_attr( + feature = "crossbeam-utils", + doc = "config.thread_mode = ThreadMode::Parallel;" +)] +#[cfg_attr(not(feature = "crossbeam-utils"), doc = "config.lanes = 1;")] +#[cfg_attr( + not(feature = "crossbeam-utils"), + doc = "config.thread_mode = ThreadMode::Sequential;" +)] /// let encoded = argon2::hash_encoded(pwd, salt, &config).unwrap(); /// ``` pub fn hash_encoded(pwd: &[u8], salt: &[u8], config: &Config) -> Result { @@ -108,16 +118,25 @@ pub fn hash_encoded(pwd: &[u8], salt: &[u8], config: &Config) -> Result /// ``` /// /// -/// Create an Argon2d hash with 4 lanes: +/// Create an Argon2d hash with 4 lanes and parallel execution: /// /// ``` -/// use argon2::{self, Config, Variant}; +/// use argon2::{self, Config, ThreadMode, Variant}; /// /// let pwd = b"password"; /// let salt = b"somesalt"; /// let mut config = Config::default(); /// config.variant = Variant::Argon2d; -/// config.lanes = 4; +#[cfg_attr(feature = "crossbeam-utils", doc = "config.lanes = 4;")] +#[cfg_attr( + feature = "crossbeam-utils", + doc = "config.thread_mode = ThreadMode::Parallel;" +)] +#[cfg_attr(not(feature = "crossbeam-utils"), doc = "config.lanes = 1;")] +#[cfg_attr( + not(feature = "crossbeam-utils"), + doc = "config.thread_mode = ThreadMode::Sequential;" +)] /// let vec = argon2::hash_raw(pwd, salt, &config).unwrap(); /// ``` pub fn hash_raw(pwd: &[u8], salt: &[u8], config: &Config) -> Result> { @@ -160,12 +179,18 @@ pub fn verify_encoded(encoded: &str, pwd: &[u8]) -> Result { /// ``` pub fn verify_encoded_ext(encoded: &str, pwd: &[u8], secret: &[u8], ad: &[u8]) -> Result { let decoded = encoding::decode_string(encoded)?; + let threads = if cfg!(feature = "crossbeam-utils") { + decoded.parallelism + } else { + 1 + }; let config = Config { variant: decoded.variant, version: decoded.version, mem_cost: decoded.mem_cost, time_cost: decoded.time_cost, lanes: decoded.parallelism, + thread_mode: ThreadMode::from_threads(threads), secret, ad, hash_length: decoded.hash.len() as u32, diff --git a/src/config.rs b/src/config.rs index 9f8269f..f43acca 100644 --- a/src/config.rs +++ b/src/config.rs @@ -6,6 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use crate::thread_mode::ThreadMode; use crate::variant::Variant; use crate::version::Version; @@ -14,7 +15,7 @@ use crate::version::Version; /// # Examples /// /// ``` -/// use argon2::{Config, Variant, Version}; +/// use argon2::{Config, ThreadMode, Variant, Version}; /// /// let config = Config::default(); /// assert_eq!(config.ad, &[]); @@ -24,6 +25,7 @@ use crate::version::Version; /// assert_eq!(config.secret, &[]); /// assert_eq!(config.time_cost, 2); /// assert_eq!(config.variant, Variant::Argon2id); +/// assert_eq!(config.thread_mode, ThreadMode::Sequential); /// assert_eq!(config.version, Version::Version13); /// ``` #[derive(Clone, Debug, PartialEq)] @@ -43,6 +45,9 @@ pub struct Config<'a> { /// The key. pub secret: &'a [u8], + /// The thread mode. + pub thread_mode: ThreadMode, + /// The number of passes. pub time_cost: u32, @@ -62,6 +67,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 4096, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 3, variant: Variant::Argon2i, version: Version::Version13, @@ -76,6 +82,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 47104, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 1, variant: Variant::Argon2id, version: Version::Version13, @@ -90,6 +97,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 19456, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 2, variant: Variant::Argon2id, version: Version::Version13, @@ -104,6 +112,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 12288, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 3, variant: Variant::Argon2id, version: Version::Version13, @@ -118,6 +127,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 9216, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 4, variant: Variant::Argon2id, version: Version::Version13, @@ -132,6 +142,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 7168, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 5, variant: Variant::Argon2id, version: Version::Version13, @@ -146,6 +157,7 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 2097152, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 1, variant: Variant::Argon2id, version: Version::Version13, @@ -160,11 +172,16 @@ impl<'a> Config<'a> { lanes: 1, mem_cost: 65536, secret: &[], + thread_mode: ThreadMode::default(), time_cost: 3, variant: Variant::Argon2id, version: Version::Version13, } } + + pub fn uses_sequential(&self) -> bool { + self.thread_mode == ThreadMode::Sequential || self.lanes == 1 + } } impl<'a> Default for Config<'a> { @@ -178,6 +195,7 @@ impl<'a> Default for Config<'a> { mod tests { use crate::config::Config; + use crate::thread_mode::ThreadMode; use crate::variant::Variant; use crate::version::Version; @@ -189,6 +207,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 19 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 2); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -202,6 +221,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 4096); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 3); assert_eq!(config.variant, Variant::Argon2i); assert_eq!(config.version, Version::Version13); @@ -215,6 +235,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 46 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 1); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -228,6 +249,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 19 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 2); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -241,6 +263,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 12 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 3); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -254,6 +277,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 9 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 4); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -267,6 +291,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 7 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 5); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -280,6 +305,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 2 * 1024 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 1); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); @@ -293,6 +319,7 @@ mod tests { assert_eq!(config.lanes, 1); assert_eq!(config.mem_cost, 64 * 1024); assert_eq!(config.secret, &[]); + assert_eq!(config.thread_mode, ThreadMode::Sequential); assert_eq!(config.time_cost, 3); assert_eq!(config.variant, Variant::Argon2id); assert_eq!(config.version, Version::Version13); diff --git a/src/context.rs b/src/context.rs index 390db1f..c777aa2 100644 --- a/src/context.rs +++ b/src/context.rs @@ -118,6 +118,7 @@ mod tests { use crate::config::Config; use crate::context::Context; use crate::error::Error; + use crate::thread_mode::ThreadMode; use crate::variant::Variant; use crate::version::Version; @@ -129,6 +130,7 @@ mod tests { lanes: 4, mem_cost: 4096, secret: b"secret", + thread_mode: ThreadMode::Sequential, time_cost: 3, variant: Variant::Argon2i, version: Version::Version13, diff --git a/src/core.rs b/src/core.rs index 92aa4ba..df2d52c 100644 --- a/src/core.rs +++ b/src/core.rs @@ -13,6 +13,8 @@ use crate::memory::Memory; use crate::variant::Variant; use crate::version::Version; use blake2b_simd::Params; +#[cfg(feature = "crossbeam-utils")] +use crossbeam_utils::thread::scope; /// Position of the block currently being operated on. #[derive(Clone, Debug)] @@ -30,7 +32,11 @@ pub fn initialize(context: &Context, memory: &mut Memory) { /// Fills all the memory blocks. pub fn fill_memory_blocks(context: &Context, memory: &mut Memory) { - fill_memory_blocks_st(context, memory); + if context.config.uses_sequential() { + fill_memory_blocks_st(context, memory); + } else { + fill_memory_blocks_mt(context, memory); + } } /// Calculates the final hash and returns it. @@ -177,6 +183,32 @@ fn fill_first_blocks(context: &Context, memory: &mut Memory, h0: &mut [u8]) { } } +#[cfg(feature = "crossbeam-utils")] +fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { + for p in 0..context.config.time_cost { + for s in 0..common::SYNC_POINTS { + let _ = scope(|scoped| { + for (l, mem) in (0..context.config.lanes).zip(memory.as_lanes_mut()) { + let position = Position { + pass: p, + lane: l, + slice: s, + index: 0, + }; + scoped.spawn(move |_| { + fill_segment(context, &position, mem); + }); + } + }); + } + } +} + +#[cfg(not(feature = "crossbeam-utils"))] +fn fill_memory_blocks_mt(_: &Context, _: &mut Memory) { + unimplemented!() +} + fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { for p in 0..context.config.time_cost { for s in 0..common::SYNC_POINTS { diff --git a/src/encoding.rs b/src/encoding.rs index 198dc61..683b799 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -158,9 +158,17 @@ pub fn num_len(number: u32) -> u32 { #[cfg(test)] mod tests { + #[cfg(feature = "crossbeam-utils")] + use crate::config::Config; + #[cfg(feature = "crossbeam-utils")] + use crate::context::Context; use crate::decoded::Decoded; + #[cfg(feature = "crossbeam-utils")] + use crate::encoding::encode_string; use crate::encoding::{base64_len, decode_string, num_len}; use crate::error::Error; + #[cfg(feature = "crossbeam-utils")] + use crate::thread_mode::ThreadMode; use crate::variant::Variant; use crate::version::Version; diff --git a/src/lib.rs b/src/lib.rs index 43753b7..10bed0e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,7 +44,7 @@ //! Create a password hash with custom settings and verify it: //! //! ```rust -//! use argon2::{self, Config, Variant, Version}; +//! use argon2::{self, Config, ThreadMode, Variant, Version}; //! //! let password = b"password"; //! let salt = b"othersalt"; @@ -53,7 +53,16 @@ //! version: Version::Version13, //! mem_cost: 65536, //! time_cost: 10, -//! lanes: 4, +#![cfg_attr(feature = "crossbeam-utils", doc = " lanes: 4,")] +#![cfg_attr( + feature = "crossbeam-utils", + doc = " thread_mode: ThreadMode::Parallel," +)] +#![cfg_attr(not(feature = "crossbeam-utils"), doc = " lanes: 1,")] +#![cfg_attr( + not(feature = "crossbeam-utils"), + doc = " thread_mode: ThreadMode::Sequential," +)] //! secret: &[], //! ad: &[], //! hash_length: 32 @@ -85,6 +94,7 @@ mod encoding; mod error; mod memory; mod result; +mod thread_mode; mod variant; mod version; @@ -92,5 +102,6 @@ pub use crate::argon2::*; pub use crate::config::Config; pub use crate::error::Error; pub use crate::result::Result; +pub use crate::thread_mode::ThreadMode; pub use crate::variant::Variant; pub use crate::version::Version; diff --git a/src/memory.rs b/src/memory.rs index a7b1756..64581e5 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -32,6 +32,17 @@ impl Memory { let blocks = vec![Block::zero(); total].into_boxed_slice(); Memory { rows, cols, blocks } } + + #[cfg(feature = "crossbeam-utils")] + /// Gets the mutable lanes representation of the memory matrix. + pub fn as_lanes_mut(&mut self) -> Vec<&mut Memory> { + let ptr: *mut Memory = self; + let mut vec = Vec::with_capacity(self.rows); + for _ in 0..self.rows { + vec.push(unsafe { &mut (*ptr) }); + } + vec + } } impl Debug for Memory { @@ -95,4 +106,12 @@ mod tests { assert_eq!(memory.cols, lane_length as usize); assert_eq!(memory.blocks.len(), 512); } + + #[cfg(feature = "crossbeam-utils")] + #[test] + fn as_lanes_mut_returns_correct_vec() { + let mut memory = Memory::new(4, 128); + let lanes = memory.as_lanes_mut(); + assert_eq!(lanes.len(), 4); + } } diff --git a/src/thread_mode.rs b/src/thread_mode.rs new file mode 100644 index 0000000..6259eb6 --- /dev/null +++ b/src/thread_mode.rs @@ -0,0 +1,64 @@ +// Copyright (c) 2017 Xidorn Quan +// Copyright (c) 2017 Martijn Rijkeboer +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// The thread mode used to perform the hashing. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ThreadMode { + /// Run in one thread. + Sequential, + + #[cfg(feature = "crossbeam-utils")] + /// Run in the same number of threads as the number of lanes. + Parallel, +} + +impl ThreadMode { + #[cfg(feature = "crossbeam-utils")] + /// Create a thread mode from the threads count. + pub fn from_threads(threads: u32) -> ThreadMode { + if threads > 1 { + ThreadMode::Parallel + } else { + ThreadMode::Sequential + } + } + + #[cfg(not(feature = "crossbeam-utils"))] + pub fn from_threads(threads: u32) -> ThreadMode { + assert_eq!(threads, 1); + Self::default() + } +} + +impl Default for ThreadMode { + fn default() -> ThreadMode { + ThreadMode::Sequential + } +} + +#[cfg(test)] +mod tests { + + use crate::thread_mode::ThreadMode; + + #[test] + fn default_returns_correct_thread_mode() { + assert_eq!(ThreadMode::default(), ThreadMode::Sequential); + } + + #[cfg(feature = "crossbeam-utils")] + #[test] + fn from_threads_returns_correct_thread_mode() { + assert_eq!(ThreadMode::from_threads(0), ThreadMode::Sequential); + assert_eq!(ThreadMode::from_threads(1), ThreadMode::Sequential); + assert_eq!(ThreadMode::from_threads(2), ThreadMode::Parallel); + assert_eq!(ThreadMode::from_threads(10), ThreadMode::Parallel); + assert_eq!(ThreadMode::from_threads(100), ThreadMode::Parallel); + } +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 80792be..0d6a3ce 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -8,7 +8,7 @@ // These tests are based on Argon's test.c test suite. -use argon2::{Config, Error, Variant, Version}; +use argon2::{Config, Error, ThreadMode, Variant, Version}; use hex::ToHex; #[cfg(not(debug_assertions))] @@ -1014,6 +1014,7 @@ fn test_hash_raw_with_not_enough_memory() { mem_cost: 2, time_cost: 2, lanes: 1, + thread_mode: ThreadMode::Sequential, secret: &[], ad: &[], hash_length: 32, @@ -1032,6 +1033,7 @@ fn test_hash_raw_with_too_short_salt() { mem_cost: 2048, time_cost: 2, lanes: 1, + thread_mode: ThreadMode::Sequential, secret: &[], ad: &[], hash_length: 32, @@ -1051,12 +1053,14 @@ fn hash_test( hex: &str, enc: &str, ) { + let threads = if cfg!(feature = "crossbeam-utils") { p } else { 1 }; let config = Config { variant: var, version: ver, mem_cost: m, time_cost: t, lanes: p, + thread_mode: ThreadMode::from_threads(threads), secret: &[], ad: &[], hash_length: 32, From 01b2bfea1f993bc29117c38347d1a6562281d667 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Wed, 8 Jan 2025 23:17:33 -0300 Subject: [PATCH 04/13] Try to fix soundness problem by juggling Block pointers This is just a proof-of-concept. It seems to not immediately fail on Miri (like my first attempts), but I have yet to let it run to completion. --- src/core.rs | 22 +++++++++++++--------- src/memory.rs | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/src/core.rs b/src/core.rs index df2d52c..bae81d7 100644 --- a/src/core.rs +++ b/src/core.rs @@ -9,7 +9,7 @@ use crate::block::Block; use crate::common; use crate::context::Context; -use crate::memory::Memory; +use crate::memory::{Memory, UnsafeMemory}; use crate::variant::Variant; use crate::version::Version; use blake2b_simd::Params; @@ -185,10 +185,12 @@ fn fill_first_blocks(context: &Context, memory: &mut Memory, h0: &mut [u8]) { #[cfg(feature = "crossbeam-utils")] fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { + let mem = UnsafeMemory::new(memory); + let mem = &mem; for p in 0..context.config.time_cost { for s in 0..common::SYNC_POINTS { let _ = scope(|scoped| { - for (l, mem) in (0..context.config.lanes).zip(memory.as_lanes_mut()) { + for l in 0..context.config.lanes { let position = Position { pass: p, lane: l, @@ -219,13 +221,13 @@ fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { slice: s, index: 0, }; - fill_segment(context, &position, memory); + fill_segment(context, &position, &UnsafeMemory::new(memory)); } } } } -fn fill_segment(context: &Context, position: &Position, memory: &mut Memory) { +fn fill_segment(context: &Context, position: &Position, memory: &UnsafeMemory) { let mut position = position.clone(); let data_independent_addressing = (context.config.variant == Variant::Argon2i) || (context.config.variant == Variant::Argon2id && position.pass == 0) @@ -280,7 +282,7 @@ fn fill_segment(context: &Context, position: &Position, memory: &mut Memory) { } pseudo_rand = address_block[(i % common::ADDRESSES_IN_BLOCK) as usize]; } else { - pseudo_rand = memory[prev_offset][0]; + pseudo_rand = unsafe { memory.get_block_unsafe(prev_offset as usize) }[0]; } // 1.2.2 Computing the lane of the reference block @@ -299,10 +301,12 @@ fn fill_segment(context: &Context, position: &Position, memory: &mut Memory) { // 2 Creating a new block let index = context.lane_length as u64 * ref_lane + ref_index as u64; - let mut curr_block = memory[curr_offset].clone(); + assert_ne!(curr_offset, prev_offset); + assert_ne!(curr_offset as usize, index as usize); + let mut curr_block = unsafe { memory.get_block_unsafe(curr_offset as usize) }.clone(); { - let prev_block = &memory[prev_offset]; - let ref_block = &memory[index]; + let prev_block = unsafe { memory.get_block_unsafe(prev_offset as usize) }; + let ref_block = unsafe { memory.get_block_unsafe(index as usize) }; if context.config.version == Version::Version10 || position.pass == 0 { fill_block(prev_block, ref_block, &mut curr_block, false); } else { @@ -310,7 +314,7 @@ fn fill_segment(context: &Context, position: &Position, memory: &mut Memory) { } } - memory[curr_offset] = curr_block; + unsafe { memory.set_block_unsafe(curr_offset as usize, curr_block) }; curr_offset += 1; prev_offset += 1; } diff --git a/src/memory.rs b/src/memory.rs index 64581e5..ede9317 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -7,6 +7,7 @@ // except according to those terms. use crate::block::Block; +use std::cell::UnsafeCell; use std::fmt; use std::fmt::Debug; use std::ops::{Index, IndexMut}; @@ -45,6 +46,40 @@ impl Memory { } } +pub struct UnsafeMemory { + inner: UnsafeCell<*mut [Block]>, +} + +impl UnsafeMemory { + pub fn new(mem: &mut Memory) -> UnsafeMemory { + UnsafeMemory { + inner: UnsafeCell::new(&mut *mem.blocks), + } + } + + /// # Safety + /// + /// The caller must ensure `mem` is valid, `index` is in bounds, and that no other references + /// exist to the requested block. + #[cfg(feature = "crossbeam-utils")] + pub unsafe fn get_block_unsafe(&self, index: usize) -> &Block { + &*(*self.inner.get() as *const Block).offset(index.try_into().unwrap()) + } + + /// # Safety + /// + /// The caller must ensure `mem` is valid, `index` is in bounds, and that no other references + /// exist to the block to be replaced. + #[cfg(feature = "crossbeam-utils")] + pub unsafe fn set_block_unsafe(&self, index: usize, block: Block) { + *(*self.inner.get() as *mut Block).offset(index.try_into().unwrap()) = block; + } +} + +// SAFETY: we hope for the best, really. +unsafe impl Send for UnsafeMemory {} +unsafe impl Sync for UnsafeMemory {} + impl Debug for Memory { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Memory {{ rows: {}, cols: {} }}", self.rows, self.cols) From b875184b1282fde0c778b1735070f6e69c123431 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Thu, 9 Jan 2025 04:17:49 -0300 Subject: [PATCH 05/13] Review, simplify and add safety comments on unsafe code for parallelism --- src/core.rs | 56 +++++++++++++++++++++++------------ src/memory.rs | 82 +++++++++++++++++++++++++++++---------------------- 2 files changed, 84 insertions(+), 54 deletions(-) diff --git a/src/core.rs b/src/core.rs index bae81d7..a6a7d01 100644 --- a/src/core.rs +++ b/src/core.rs @@ -6,10 +6,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#![warn(unsafe_op_in_unsafe_fn)] + use crate::block::Block; use crate::common; use crate::context::Context; -use crate::memory::{Memory, UnsafeMemory}; +use crate::memory::{Memory, UnsafeBlocks}; use crate::variant::Variant; use crate::version::Version; use blake2b_simd::Params; @@ -185,8 +187,7 @@ fn fill_first_blocks(context: &Context, memory: &mut Memory, h0: &mut [u8]) { #[cfg(feature = "crossbeam-utils")] fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { - let mem = UnsafeMemory::new(memory); - let mem = &mem; + let mem = &memory.as_unsafe_blocks(); for p in 0..context.config.time_cost { for s in 0..common::SYNC_POINTS { let _ = scope(|scoped| { @@ -198,7 +199,10 @@ fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { index: 0, }; scoped.spawn(move |_| { - fill_segment(context, &position, mem); + // SAFETY: segments are processed slicewise and then each passed to a + // different thread. The threads synchronize when the `scope` is dropped. + // The arguments are correct by construction. + unsafe { fill_segment(context, &position, mem) }; }); } }); @@ -221,13 +225,21 @@ fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { slice: s, index: 0, }; - fill_segment(context, &position, &UnsafeMemory::new(memory)); + // SAFETY: segments are processed slicewise and then sequentially, and the + // arguments are correct by construction. + unsafe { fill_segment(context, &position, &memory.as_unsafe_blocks()) }; } } } } -fn fill_segment(context: &Context, position: &Position, memory: &UnsafeMemory) { +/// # Safety +/// +/// The memory must be processed slicewise, and with this function being called exactly once per +/// segment. If segments are filled in parallel, synchronization points are required between +/// slices. Finally, the supplied arguments must be consistent, otherwise offsets may be computed +/// incorrectly, leading to memory bugs. +unsafe fn fill_segment(context: &Context, position: &Position, memory: &UnsafeBlocks) { let mut position = position.clone(); let data_independent_addressing = (context.config.variant == Variant::Argon2i) || (context.config.variant == Variant::Argon2id && position.pass == 0) @@ -274,6 +286,10 @@ fn fill_segment(context: &Context, position: &Position, memory: &UnsafeMemory) { prev_offset = curr_offset - 1; } + // Ensure that offsets are in bounds for later `unsafe` operations. + assert!(curr_offset < context.memory_blocks); + assert!(prev_offset < context.memory_blocks); + // 1.2 Computing the index of the reference block // 1.2.1 Taking pseudo-random value from the previous block if data_independent_addressing { @@ -282,7 +298,10 @@ fn fill_segment(context: &Context, position: &Position, memory: &UnsafeMemory) { } pseudo_rand = address_block[(i % common::ADDRESSES_IN_BLOCK) as usize]; } else { - pseudo_rand = unsafe { memory.get_block_unsafe(prev_offset as usize) }[0]; + // SAFETY: `prev_offset` has been asserted to be in bounds, and the slicewise + // computation of Argon2 means that the referenced block shouldn't be mutably aliased + // or subject to a data race. + pseudo_rand = unsafe { memory.get_unchecked(prev_offset as usize) }[0]; } // 1.2.2 Computing the lane of the reference block @@ -301,20 +320,19 @@ fn fill_segment(context: &Context, position: &Position, memory: &UnsafeMemory) { // 2 Creating a new block let index = context.lane_length as u64 * ref_lane + ref_index as u64; - assert_ne!(curr_offset, prev_offset); - assert_ne!(curr_offset as usize, index as usize); - let mut curr_block = unsafe { memory.get_block_unsafe(curr_offset as usize) }.clone(); - { - let prev_block = unsafe { memory.get_block_unsafe(prev_offset as usize) }; - let ref_block = unsafe { memory.get_block_unsafe(index as usize) }; - if context.config.version == Version::Version10 || position.pass == 0 { - fill_block(prev_block, ref_block, &mut curr_block, false); - } else { - fill_block(prev_block, ref_block, &mut curr_block, true); - } + assert!(index < context.memory_blocks as u64); + // SAFETY: `curr_offset`, `prev_offset` and `index` have been asserted to be in bounds and + // the slicewise computation of Argon2 means that there should be no mutable aliasing or + // data races. + let curr_block = unsafe { memory.get_mut_unchecked(curr_offset as usize) }; + let prev_block = unsafe { memory.get_unchecked(prev_offset as usize) }; + let ref_block = unsafe { memory.get_unchecked(index as usize) }; + if context.config.version == Version::Version10 || position.pass == 0 { + fill_block(prev_block, ref_block, curr_block, false); + } else { + fill_block(prev_block, ref_block, curr_block, true); } - unsafe { memory.set_block_unsafe(curr_offset as usize, curr_block) }; curr_offset += 1; prev_offset += 1; } diff --git a/src/memory.rs b/src/memory.rs index ede9317..bb3dc15 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -6,10 +6,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#![warn(unsafe_op_in_unsafe_fn)] + use crate::block::Block; use std::cell::UnsafeCell; use std::fmt; use std::fmt::Debug; +use std::marker::PhantomData; use std::ops::{Index, IndexMut}; /// Structure representing the memory matrix. @@ -34,51 +37,68 @@ impl Memory { Memory { rows, cols, blocks } } + /// Return a wrapped pointer to the flat array of blocks for parallel disjoint access. #[cfg(feature = "crossbeam-utils")] - /// Gets the mutable lanes representation of the memory matrix. - pub fn as_lanes_mut(&mut self) -> Vec<&mut Memory> { - let ptr: *mut Memory = self; - let mut vec = Vec::with_capacity(self.rows); - for _ in 0..self.rows { - vec.push(unsafe { &mut (*ptr) }); + pub fn as_unsafe_blocks(&mut self) -> UnsafeBlocks { + UnsafeBlocks { + inner: UnsafeCell::new(&mut *self.blocks), + lifetime: PhantomData, } - vec } } -pub struct UnsafeMemory { +/// Wrapped pointer to the flat array of blocks for parallel disjoint access. +/// +/// All operations are unchecked and require `unsafe`. +pub struct UnsafeBlocks<'a> { inner: UnsafeCell<*mut [Block]>, + lifetime: PhantomData<&'a mut [Block]>, } -impl UnsafeMemory { - pub fn new(mem: &mut Memory) -> UnsafeMemory { - UnsafeMemory { - inner: UnsafeCell::new(&mut *mem.blocks), - } - } - +impl UnsafeBlocks<'_> { + /// Get a shared reference to the `Block` at `index`. + /// /// # Safety /// - /// The caller must ensure `mem` is valid, `index` is in bounds, and that no other references - /// exist to the requested block. + /// The caller must ensure that `index` is in bounds, no mutable references exist or are + /// created to the corresponding block, and no data races happen while the returned reference + /// lives. #[cfg(feature = "crossbeam-utils")] - pub unsafe fn get_block_unsafe(&self, index: usize) -> &Block { - &*(*self.inner.get() as *const Block).offset(index.try_into().unwrap()) + pub unsafe fn get_unchecked(&self, index: usize) -> &Block { + // SAFETY: by construction, the pointer to the slice of blocks is dereferenceable. + let first_block: *const Block = unsafe { *self.inner.get() } as _; + // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within + // the bounds of the allocated object, and the offset in bytes fits in an `isize`. + let ptr = unsafe { first_block.add(index) }; + // SAFETY: the caller promises that there are no mutable references or data races to + // mutate the requested `Block`. + unsafe { &*ptr } } + /// Get a mutable reference to the `Block` at `index`. + /// /// # Safety /// - /// The caller must ensure `mem` is valid, `index` is in bounds, and that no other references - /// exist to the block to be replaced. - #[cfg(feature = "crossbeam-utils")] - pub unsafe fn set_block_unsafe(&self, index: usize, block: Block) { - *(*self.inner.get() as *mut Block).offset(index.try_into().unwrap()) = block; + /// The caller must ensure that `index` is in bounds, no other references exist or are created + /// to the corresponding block, and no data races happen while the returned reference lives. + #[allow(clippy::mut_from_ref)] // TODO: check that this really is a false positive (Mutex?) + pub unsafe fn get_mut_unchecked(&self, index: usize) -> &mut Block { + // SAFETY: by construction, the pointer to the slice of blocks is dereferenceable. + let first_block: *mut Block = unsafe { *self.inner.get() } as _; + // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within + // the bounds of the allocated object, and the offset in bytes fits in an `isize`. + let ptr = unsafe { first_block.add(index) }; + // SAFETY: the caller promises that there are no other references or accesses, nor data + // races, that may access the requested `Block`. + unsafe { &mut *ptr } } } -// SAFETY: we hope for the best, really. -unsafe impl Send for UnsafeMemory {} -unsafe impl Sync for UnsafeMemory {} +// SAFETY: passing or sharing `UnsafeBlocks` accross threads is, in itself, safe. Using it isn't, +// as the user must ensure that there are no data races or mutable aliasing, but all of its methods +// already require `unsafe`. +unsafe impl Send for UnsafeBlocks<'_> {} +unsafe impl Sync for UnsafeBlocks<'_> {} impl Debug for Memory { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -141,12 +161,4 @@ mod tests { assert_eq!(memory.cols, lane_length as usize); assert_eq!(memory.blocks.len(), 512); } - - #[cfg(feature = "crossbeam-utils")] - #[test] - fn as_lanes_mut_returns_correct_vec() { - let mut memory = Memory::new(4, 128); - let lanes = memory.as_lanes_mut(); - assert_eq!(lanes.len(), 4); - } } From 05591f2cbc70dcd3fe8cdc2e61cd6867174afecb Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Thu, 9 Jan 2025 05:03:19 -0300 Subject: [PATCH 06/13] Remove UnsafeCell wrapping just a pointer --- src/memory.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/memory.rs b/src/memory.rs index bb3dc15..298f31e 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -9,7 +9,6 @@ #![warn(unsafe_op_in_unsafe_fn)] use crate::block::Block; -use std::cell::UnsafeCell; use std::fmt; use std::fmt::Debug; use std::marker::PhantomData; @@ -39,10 +38,10 @@ impl Memory { /// Return a wrapped pointer to the flat array of blocks for parallel disjoint access. #[cfg(feature = "crossbeam-utils")] - pub fn as_unsafe_blocks(&mut self) -> UnsafeBlocks { + pub fn as_unsafe_blocks(&mut self) -> UnsafeBlocks<'_> { UnsafeBlocks { - inner: UnsafeCell::new(&mut *self.blocks), - lifetime: PhantomData, + ptr: &mut *self.blocks, + phantom: PhantomData, } } } @@ -51,8 +50,8 @@ impl Memory { /// /// All operations are unchecked and require `unsafe`. pub struct UnsafeBlocks<'a> { - inner: UnsafeCell<*mut [Block]>, - lifetime: PhantomData<&'a mut [Block]>, + ptr: *mut [Block], + phantom: PhantomData<&'a [Block]>, } impl UnsafeBlocks<'_> { @@ -65,8 +64,7 @@ impl UnsafeBlocks<'_> { /// lives. #[cfg(feature = "crossbeam-utils")] pub unsafe fn get_unchecked(&self, index: usize) -> &Block { - // SAFETY: by construction, the pointer to the slice of blocks is dereferenceable. - let first_block: *const Block = unsafe { *self.inner.get() } as _; + let first_block: *const Block = self.ptr as _; // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within // the bounds of the allocated object, and the offset in bytes fits in an `isize`. let ptr = unsafe { first_block.add(index) }; @@ -81,10 +79,9 @@ impl UnsafeBlocks<'_> { /// /// The caller must ensure that `index` is in bounds, no other references exist or are created /// to the corresponding block, and no data races happen while the returned reference lives. - #[allow(clippy::mut_from_ref)] // TODO: check that this really is a false positive (Mutex?) + #[allow(clippy::mut_from_ref)] pub unsafe fn get_mut_unchecked(&self, index: usize) -> &mut Block { - // SAFETY: by construction, the pointer to the slice of blocks is dereferenceable. - let first_block: *mut Block = unsafe { *self.inner.get() } as _; + let first_block: *mut Block = self.ptr as _; // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within // the bounds of the allocated object, and the offset in bytes fits in an `isize`. let ptr = unsafe { first_block.add(index) }; From 9139e1a8d904944ecdf09090d4fa2d688b733ed3 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Thu, 9 Jan 2025 21:46:04 -0300 Subject: [PATCH 07/13] Move the safety frontier as needed and improve the safety arguments --- src/argon2.rs | 3 ++- src/core.rs | 65 ++++++++++++++++++++++++++++++++++++--------------- src/memory.rs | 8 ++++--- 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/argon2.rs b/src/argon2.rs index 9a3fedb..85d4a67 100644 --- a/src/argon2.rs +++ b/src/argon2.rs @@ -228,7 +228,8 @@ pub fn verify_raw(pwd: &[u8], salt: &[u8], hash: &[u8], config: &Config) -> Resu fn run(context: &Context) -> Vec { let mut memory = Memory::new(context.config.lanes, context.lane_length); core::initialize(context, &mut memory); - core::fill_memory_blocks(context, &mut memory); + // SAFETY: `memory` is consistent with `context` by construction. + unsafe { core::fill_memory_blocks(context, &mut memory) }; core::finalize(context, &memory) } diff --git a/src/core.rs b/src/core.rs index a6a7d01..1b99021 100644 --- a/src/core.rs +++ b/src/core.rs @@ -33,11 +33,17 @@ pub fn initialize(context: &Context, memory: &mut Memory) { } /// Fills all the memory blocks. -pub fn fill_memory_blocks(context: &Context, memory: &mut Memory) { +/// +/// # Safety +/// +/// The provided `context` and `memory` arguments must be consistent. +pub unsafe fn fill_memory_blocks(context: &Context, memory: &mut Memory) { if context.config.uses_sequential() { - fill_memory_blocks_st(context, memory); + // SAFETY: adhering to the safety contract is delegated to the caller. + unsafe { fill_memory_blocks_st(context, memory) }; } else { - fill_memory_blocks_mt(context, memory); + // SAFETY: adhering to the safety contract is delegated to the caller. + unsafe { fill_memory_blocks_mt(context, memory) }; } } @@ -185,8 +191,11 @@ fn fill_first_blocks(context: &Context, memory: &mut Memory, h0: &mut [u8]) { } } +/// # Safety +/// +/// The provided `context` and `memory` arguments must be consistent. #[cfg(feature = "crossbeam-utils")] -fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { +unsafe fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { let mem = &memory.as_unsafe_blocks(); for p in 0..context.config.time_cost { for s in 0..common::SYNC_POINTS { @@ -211,11 +220,14 @@ fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { } #[cfg(not(feature = "crossbeam-utils"))] -fn fill_memory_blocks_mt(_: &Context, _: &mut Memory) { +unsafe fn fill_memory_blocks_mt(_: &Context, _: &mut Memory) { unimplemented!() } -fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { +/// # Safety +/// +/// The provided `context` and `memory` arguments must be consistent. +unsafe fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { for p in 0..context.config.time_cost { for s in 0..common::SYNC_POINTS { for l in 0..context.config.lanes { @@ -236,9 +248,12 @@ fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { /// # Safety /// /// The memory must be processed slicewise, and with this function being called exactly once per -/// segment. If segments are filled in parallel, synchronization points are required between -/// slices. Finally, the supplied arguments must be consistent, otherwise offsets may be computed -/// incorrectly, leading to memory bugs. +/// segment, where a segment is the intersection between the slice being processed and a lane. +/// +/// If segments are filled in parallel, synchronization points are also required between slices. +/// +/// Finally, the provided `context` and `memory` arguments must be consistent, and `position` must +/// reflect the correct current lane and slice. unsafe fn fill_segment(context: &Context, position: &Position, memory: &UnsafeBlocks) { let mut position = position.clone(); let data_independent_addressing = (context.config.variant == Variant::Argon2i) @@ -286,10 +301,6 @@ unsafe fn fill_segment(context: &Context, position: &Position, memory: &UnsafeBl prev_offset = curr_offset - 1; } - // Ensure that offsets are in bounds for later `unsafe` operations. - assert!(curr_offset < context.memory_blocks); - assert!(prev_offset < context.memory_blocks); - // 1.2 Computing the index of the reference block // 1.2.1 Taking pseudo-random value from the previous block if data_independent_addressing { @@ -298,9 +309,10 @@ unsafe fn fill_segment(context: &Context, position: &Position, memory: &UnsafeBl } pseudo_rand = address_block[(i % common::ADDRESSES_IN_BLOCK) as usize]; } else { - // SAFETY: `prev_offset` has been asserted to be in bounds, and the slicewise - // computation of Argon2 means that the referenced block shouldn't be mutably aliased - // or subject to a data race. + assert!(prev_offset < context.memory_blocks); + assert!(prev_offset / context.lane_length == position.lane); + // SAFETY: `prev_offset` is in bounds and on this lane, so the block isn't mutably + // aliased or mutated from another thread (in the current slice). pseudo_rand = unsafe { memory.get_unchecked(prev_offset as usize) }[0]; } @@ -320,10 +332,25 @@ unsafe fn fill_segment(context: &Context, position: &Position, memory: &UnsafeBl // 2 Creating a new block let index = context.lane_length as u64 * ref_lane + ref_index as u64; + assert!(curr_offset < context.memory_blocks); + assert!(prev_offset < context.memory_blocks); assert!(index < context.memory_blocks as u64); - // SAFETY: `curr_offset`, `prev_offset` and `index` have been asserted to be in bounds and - // the slicewise computation of Argon2 means that there should be no mutable aliasing or - // data races. + assert!(curr_offset != prev_offset && (curr_offset as u64) != index); + assert!(prev_offset / context.lane_length == position.lane); + assert!( + index / (context.lane_length as u64) == position.lane as u64 + || index % (context.lane_length as u64) / (context.segment_length as u64) + != position.slice as u64 + ); + // SAFETY: + // - `curr_offset`, `prev_offset` and `index` are in bounds and refer to different blocks; + // - `curr_offset` is only accessed by this thread (in the current slice), and there are no + // other references to the corresponding block; + // - `prev_offset` is on the same lane as `curr_offset`, and therefore the corresponding + // block isn't mutably aliased or mutated from another thread (in the current slice); + // - `index` is either on the same lane or on a different slice, and in both cases it + // cannot be mutably aliased or mutated from another thread (during the processing of the + // current slice). let curr_block = unsafe { memory.get_mut_unchecked(curr_offset as usize) }; let prev_block = unsafe { memory.get_unchecked(prev_offset as usize) }; let ref_block = unsafe { memory.get_unchecked(index as usize) }; diff --git a/src/memory.rs b/src/memory.rs index 298f31e..9a8d444 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -36,7 +36,7 @@ impl Memory { Memory { rows, cols, blocks } } - /// Return a wrapped pointer to the flat array of blocks for parallel disjoint access. + /// Returns a pointer to the flat array of blocks useful for parallel disjoint access. #[cfg(feature = "crossbeam-utils")] pub fn as_unsafe_blocks(&mut self) -> UnsafeBlocks<'_> { UnsafeBlocks { @@ -46,9 +46,11 @@ impl Memory { } } -/// Wrapped pointer to the flat array of blocks for parallel disjoint access. +/// A wrapped raw pointer to the flat array of blocks, useful for parallel disjoint access. /// -/// All operations are unchecked and require `unsafe`. +/// All operations on this type are unchecked and require `unsafe`. The caller is responsible for +/// accessing the blocks in a manner that follows the aliasing rules and that doesn't result in +/// data races. pub struct UnsafeBlocks<'a> { ptr: *mut [Block], phantom: PhantomData<&'a [Block]>, From 4148257c4016c3e2fdbc14fcdafe02581f82cb29 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Fri, 10 Jan 2025 01:33:43 -0300 Subject: [PATCH 08/13] Replace *mut [Block] with *mut Block and then NonNull MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also review that an UnsafeCell isn't necessary. The Glossary of the Unsafe Code Guidelines Reference explicitly states: > Finding live shared references propagates recursively through > references, but not through raw pointers. [...] If data immediately > pointed to by a *const T or &*const T is mutated, that's *not* interior > mutability. — https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#interior-mutability The Reference also only mentions transitivity through references and boxes: > Moreover, the bytes pointed to by a shared reference, including > transitively through other references (both shared and mutable) and > Boxes, are immutable; transitivity includes those references stored in > fields of compound types. — https://doc.rust-lang.org/stable/reference/behavior-considered-undefined.html?highlight=undefin#r-undefined.immutable --- src/memory.rs | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/memory.rs b/src/memory.rs index 9a8d444..78fa8ac 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -13,6 +13,7 @@ use std::fmt; use std::fmt::Debug; use std::marker::PhantomData; use std::ops::{Index, IndexMut}; +use std::ptr::NonNull; /// Structure representing the memory matrix. pub struct Memory { @@ -40,7 +41,7 @@ impl Memory { #[cfg(feature = "crossbeam-utils")] pub fn as_unsafe_blocks(&mut self) -> UnsafeBlocks<'_> { UnsafeBlocks { - ptr: &mut *self.blocks, + blocks: NonNull::new(self.blocks.as_mut_ptr()).unwrap(), phantom: PhantomData, } } @@ -48,11 +49,10 @@ impl Memory { /// A wrapped raw pointer to the flat array of blocks, useful for parallel disjoint access. /// -/// All operations on this type are unchecked and require `unsafe`. The caller is responsible for -/// accessing the blocks in a manner that follows the aliasing rules and that doesn't result in -/// data races. +/// All operations on this type are unchecked and require `unsafe`. Callers are responsible for +/// accessing the blocks without violating the aliasing rules or resulting in data races. pub struct UnsafeBlocks<'a> { - ptr: *mut [Block], + blocks: NonNull, phantom: PhantomData<&'a [Block]>, } @@ -66,13 +66,12 @@ impl UnsafeBlocks<'_> { /// lives. #[cfg(feature = "crossbeam-utils")] pub unsafe fn get_unchecked(&self, index: usize) -> &Block { - let first_block: *const Block = self.ptr as _; - // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within - // the bounds of the allocated object, and the offset in bytes fits in an `isize`. - let ptr = unsafe { first_block.add(index) }; - // SAFETY: the caller promises that there are no mutable references or data races to - // mutate the requested `Block`. - unsafe { &*ptr } + // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within the + // bounds of the allocated object, and the offset in bytes fits in an `isize`. + let ptr = unsafe { self.blocks.add(index) }; + // SAFETY: the caller promises that there are no mutable references to the requested + // `Block` or data races; and `ptr` points to a valid and aligned `Block`. + unsafe { ptr.as_ref() } } /// Get a mutable reference to the `Block` at `index`. @@ -83,19 +82,17 @@ impl UnsafeBlocks<'_> { /// to the corresponding block, and no data races happen while the returned reference lives. #[allow(clippy::mut_from_ref)] pub unsafe fn get_mut_unchecked(&self, index: usize) -> &mut Block { - let first_block: *mut Block = self.ptr as _; // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within // the bounds of the allocated object, and the offset in bytes fits in an `isize`. - let ptr = unsafe { first_block.add(index) }; - // SAFETY: the caller promises that there are no other references or accesses, nor data - // races, that may access the requested `Block`. - unsafe { &mut *ptr } + let mut ptr = unsafe { self.blocks.add(index) }; + // SAFETY: the caller promises that there are no other references, accesses, or data races + // affecting the target `Block`; and `ptr` points to a valid and aligned `Block`. + unsafe { ptr.as_mut() } } } -// SAFETY: passing or sharing `UnsafeBlocks` accross threads is, in itself, safe. Using it isn't, -// as the user must ensure that there are no data races or mutable aliasing, but all of its methods -// already require `unsafe`. +// SAFETY: passing or sharing an `UnsafeBlocks` accross threads is, in itself, safe (calling any +// methods on it isn't, but they already require `unsafe`). unsafe impl Send for UnsafeBlocks<'_> {} unsafe impl Sync for UnsafeBlocks<'_> {} From ba5d11e9ac4e2e2d1f5fb6fcd42833415d5a1661 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Fri, 10 Jan 2025 01:40:56 -0300 Subject: [PATCH 09/13] Review safety arguments once again --- src/argon2.rs | 3 +-- src/core.rs | 33 ++++++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/argon2.rs b/src/argon2.rs index 85d4a67..8789a08 100644 --- a/src/argon2.rs +++ b/src/argon2.rs @@ -202,7 +202,6 @@ pub fn verify_encoded_ext(encoded: &str, pwd: &[u8], secret: &[u8], ad: &[u8]) - /// /// # Examples /// -/// /// ``` /// use argon2::{self, Config}; /// @@ -228,7 +227,7 @@ pub fn verify_raw(pwd: &[u8], salt: &[u8], hash: &[u8], config: &Config) -> Resu fn run(context: &Context) -> Vec { let mut memory = Memory::new(context.config.lanes, context.lane_length); core::initialize(context, &mut memory); - // SAFETY: `memory` is consistent with `context` by construction. + // SAFETY: `memory` is constructed from `context`. unsafe { core::fill_memory_blocks(context, &mut memory) }; core::finalize(context, &memory) } diff --git a/src/core.rs b/src/core.rs index 1b99021..1c99830 100644 --- a/src/core.rs +++ b/src/core.rs @@ -208,9 +208,11 @@ unsafe fn fill_memory_blocks_mt(context: &Context, memory: &mut Memory) { index: 0, }; scoped.spawn(move |_| { - // SAFETY: segments are processed slicewise and then each passed to a - // different thread. The threads synchronize when the `scope` is dropped. - // The arguments are correct by construction. + // SAFETY: segments are processed slicewise and then each is handed + // (exactly once) to a worker thread. The threads synchronize when the + // `scope` is dropped, before the next slice is processed. The caller + // promises that `context` and `memory` are consistent, and `position` + // reflects the current lane and slice. unsafe { fill_segment(context, &position, mem) }; }); } @@ -237,8 +239,9 @@ unsafe fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { slice: s, index: 0, }; - // SAFETY: segments are processed slicewise and then sequentially, and the - // arguments are correct by construction. + // SAFETY: segments are processed slicewise and then sequentially, the caller + // promises that `context` and `memory` are consistent, and `position` reflects the + // current lane and slice. unsafe { fill_segment(context, &position, &memory.as_unsafe_blocks()) }; } } @@ -247,10 +250,11 @@ unsafe fn fill_memory_blocks_st(context: &Context, memory: &mut Memory) { /// # Safety /// -/// The memory must be processed slicewise, and with this function being called exactly once per +/// The memory must be processed slicewise, and this function must be called exactly once per /// segment, where a segment is the intersection between the slice being processed and a lane. +/// That is, within a slice, this function is called exactly once per lane. /// -/// If segments are filled in parallel, synchronization points are also required between slices. +/// If segments are filled in parallel, synchronization points are required between slices. /// /// Finally, the provided `context` and `memory` arguments must be consistent, and `position` must /// reflect the correct current lane and slice. @@ -342,15 +346,14 @@ unsafe fn fill_segment(context: &Context, position: &Position, memory: &UnsafeBl || index % (context.lane_length as u64) / (context.segment_length as u64) != position.slice as u64 ); - // SAFETY: - // - `curr_offset`, `prev_offset` and `index` are in bounds and refer to different blocks; - // - `curr_offset` is only accessed by this thread (in the current slice), and there are no - // other references to the corresponding block; + // SAFETY: `curr_offset`, `prev_offset` and `index` are in bounds and refer to different + // blocks; and during the processing of the current slice: + // - `curr_offset` is only accessed by this thread, and there are no other references to + // the corresponding block; // - `prev_offset` is on the same lane as `curr_offset`, and therefore the corresponding - // block isn't mutably aliased or mutated from another thread (in the current slice); - // - `index` is either on the same lane or on a different slice, and in both cases it - // cannot be mutably aliased or mutated from another thread (during the processing of the - // current slice). + // block isn't mutably aliased or mutated from another thread; + // - `index` is either on the same lane or on a different slice, and in both cases it will + // not be mutably aliased or mutated from another thread. let curr_block = unsafe { memory.get_mut_unchecked(curr_offset as usize) }; let prev_block = unsafe { memory.get_unchecked(prev_offset as usize) }; let ref_block = unsafe { memory.get_unchecked(index as usize) }; From 16e47e634ec4c2e676871d4aac0d452c43bd2622 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Fri, 10 Jan 2025 03:20:40 -0300 Subject: [PATCH 10/13] Add tests to run in Miri These have higher lane counts (as with parallelism enabled that may surface more bugs) but very little memory, since running in Miri is quite slow. --- src/argon2.rs | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/argon2.rs b/src/argon2.rs index 8789a08..b93ea0d 100644 --- a/src/argon2.rs +++ b/src/argon2.rs @@ -238,13 +238,41 @@ mod tests { #[test] fn single_thread_verification_multi_lane_hash() { - /* - let hash = hash_encoded(b"foo", b"abcdefghijklmnopqrstuvwxyz", &Config { - lanes: 4, thread_mode: ThreadMode::Parallel, - ..Config::default() - }); - */ - let hash = "$argon2i$v=19$m=4096,t=3,p=4$YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo$BvBk2OaSofBHfbrUW61nHrWB/43xgfs/QJJ5DkMAd8I"; - verify_encoded(hash, b"foo").unwrap(); + let hash = "$argon2i$v=19$m=4096,t=3,p=4$YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo$\ + BvBk2OaSofBHfbrUW61nHrWB/43xgfs/QJJ5DkMAd8I"; + let res = verify_encoded(hash, b"foo").unwrap(); + assert!(res); + } + + #[test] + fn test_argon2id_for_miri() { + let hash = "$argon2id$v=19$m=256,t=2,p=16$c29tZXNhbHQ$\ + 0gasyPnKXiBHQ5bft/bd4jrmy2DdtrLTX3JR9co7fRY"; + let res = verify_encoded(hash, b"password").unwrap(); + assert!(res); + } + + #[test] + fn test_argon2id_for_miri_2() { + let hash = "$argon2id$v=19$m=512,t=2,p=8$c29tZXNhbHQ$\ + qgW4yz2jO7oklapDpVwzUYgfDLzfwkppGTvhRDDBjkY"; + let res = verify_encoded(hash, b"password").unwrap(); + assert!(res); + } + + #[test] + fn test_argon2d_for_miri() { + let hash = "$argon2d$v=19$m=256,t=2,p=16$c29tZXNhbHQ$\ + doW5kZ/0cTwqwbYTwr9JD0wNwy3tMyJMMk9ojGsC8bk"; + let res = verify_encoded(hash, b"password").unwrap(); + assert!(res); + } + + #[test] + fn test_argon2i_for_miri() { + let hash = "$argon2i$v=19$m=256,t=2,p=16$c29tZXNhbHQ$\ + c1suSp12ZBNLSuyhD8pJriM2r5jP2kgZ5QdDAk3+HaY"; + let res = verify_encoded(hash, b"password").unwrap(); + assert!(res); } } From 17a500de2d1f96bbc850de3bb71aa000a21f15ff Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Fri, 10 Jan 2025 16:12:06 -0300 Subject: [PATCH 11/13] Don't feature gate `UnsafeBlocks` since it's always used --- src/memory.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/memory.rs b/src/memory.rs index 78fa8ac..a368db1 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -38,7 +38,6 @@ impl Memory { } /// Returns a pointer to the flat array of blocks useful for parallel disjoint access. - #[cfg(feature = "crossbeam-utils")] pub fn as_unsafe_blocks(&mut self) -> UnsafeBlocks<'_> { UnsafeBlocks { blocks: NonNull::new(self.blocks.as_mut_ptr()).unwrap(), @@ -64,7 +63,6 @@ impl UnsafeBlocks<'_> { /// The caller must ensure that `index` is in bounds, no mutable references exist or are /// created to the corresponding block, and no data races happen while the returned reference /// lives. - #[cfg(feature = "crossbeam-utils")] pub unsafe fn get_unchecked(&self, index: usize) -> &Block { // SAFETY: the caller promises that the `index` is in bounds; therefore, we're within the // bounds of the allocated object, and the offset in bytes fits in an `isize`. From c192626600ad7e5f690fc57b8c6022b466ceaa39 Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Fri, 10 Jan 2025 16:13:38 -0300 Subject: [PATCH 12/13] Add note on UnsafeBlocks::get_mut_unchecked not being interior mutability --- src/memory.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/memory.rs b/src/memory.rs index a368db1..fdfb132 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -85,6 +85,14 @@ impl UnsafeBlocks<'_> { let mut ptr = unsafe { self.blocks.add(index) }; // SAFETY: the caller promises that there are no other references, accesses, or data races // affecting the target `Block`; and `ptr` points to a valid and aligned `Block`. + // + // Also note that this isn't interior mutability, as we're going through a `NonNull` + // pointer derived from a mutable reference (and interior mutability is not transitive + // through raw pointers[1][2][3]). + // + // [1]: https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#interior-mutability + // [2]: https://doc.rust-lang.org/stable/reference/behavior-considered-undefined.html?highlight=undefin#r-undefined.immutable + // [3]: https://github.com/rust-lang/reference/issues/1227 unsafe { ptr.as_mut() } } } From 8e636ab902e4e82f080c570a8be1161e04253cbd Mon Sep 17 00:00:00 2001 From: Jonas Malaco Date: Fri, 10 Jan 2025 17:49:06 -0300 Subject: [PATCH 13/13] Prevent false sharing of blocks This can yield small performance improvements in some cases (e.g. a 2% improvement computing Argon2id in parallel with p=4, m=64MiB and t=1). --- src/block.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/block.rs b/src/block.rs index b0573be..2c72cda 100644 --- a/src/block.rs +++ b/src/block.rs @@ -11,7 +11,13 @@ use std::fmt; use std::fmt::Debug; use std::ops::{BitXorAssign, Index, IndexMut}; -/// Structure for the (1KB) memory block implemented as 128 64-bit words. +/// Structure for the (1 KiB) memory block implemented as 128 64-bit words. +// Blocks are 128-byte aligned to prevent false sharing. This specific alignment value replicates +// what `crossbeam-utils::CachePadded` does on modern architectures, which either use 128-byte +// cache lines (aarch64) or pull 64-byte cache lines in pairs (x86-64), without the need for +// stubbing that type in when `crossbeam-utils` isn't available. As blocks are fairly large (1 +// KiB), this simplification shouldn't severy affect the (rarer) targets with smaller cache lines. +#[repr(align(128))] pub struct Block([u64; common::QWORDS_IN_BLOCK]); impl Block {