diff --git a/Cargo.toml b/Cargo.toml index c76e7ac..3eac055 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ rust_decimal_macros = "1.34" fast-float = "0.2" rustc-hash = { version = "1.0"} memmap2 = {version = "0.9.4"} +memchr = { version = "2", default-features = false } diff --git a/flamegraphs/09-use-memchr/flamegraph.svg b/flamegraphs/09-use-memchr/flamegraph.svg new file mode 100644 index 0000000..d30a6a5 --- /dev/null +++ b/flamegraphs/09-use-memchr/flamegraph.svg @@ -0,0 +1,491 @@ +Flame Graph Reset ZoomSearch [ld-linux-x86-64.so.2] (7 samples, 0.02%)[unknown] (4 samples, 0.01%)[ld-linux-x86-64.so.2] (8 samples, 0.03%)<core::ops::range::Range<usize> as core::slice::index::SliceIndex<[T]>>::index (200 samples, 0.67%)core::slice::index::<impl core::ops::index::Index<I> for [T]>::index (805 samples, 2.70%)co..<core::ops::range::RangeFrom<usize> as core::slice::index::SliceIndex<[T]>>::index (605 samples, 2.03%)<..<core::ops::range::RangeFrom<usize> as core::slice::index::SliceIndex<[T]>>::get_unchecked (263 samples, 0.88%)<core::ops::range::Range<usize> as core::slice::index::SliceIndex<[T]>>::get_unchecked (263 samples, 0.88%)core::ptr::const_ptr::<impl *const T>::add (263 samples, 0.88%)<f32 as core::ops::arith::Div>::div (3,889 samples, 13.05%)<f32 as core::ops::a..<f32 as fast_float::float::Float>::from_u64 (41 samples, 0.14%)<f32 as fast_float::float::Float>::pow10_fast_path (121 samples, 0.41%)fast_float::number::Number::try_fast_path (4,816 samples, 16.16%)fast_float::number::Numbe..fast_float::number::Number::is_fast_path (201 samples, 0.67%)fast_float::common::AsciiStr::check_first (121 samples, 0.41%)fast_float::common::AsciiStr::is_empty (107 samples, 0.36%)fast_float::common::AsciiStr::check_first_either (122 samples, 0.41%)fast_float::common::AsciiStr::first (204 samples, 0.68%)fast_float::common::AsciiStr::offset_from (58 samples, 0.19%)core::num::<impl isize>::wrapping_sub (58 samples, 0.19%)fast_float::number::try_parse_8digits_le (319 samples, 1.07%)fast_float::common::AsciiStr::try_read_u64 (319 samples, 1.07%)fast_float::common::AsciiStr::check_len (252 samples, 0.85%)core::ptr::const_ptr::<impl *const T>::add (163 samples, 0.55%)core::num::<impl u8>::is_ascii_digit (431 samples, 1.45%)fast_float::common::AsciiStr::first (4 samples, 0.01%)fast_float::common::AsciiStr::is_empty (407 samples, 1.37%)fast_float::common::AsciiStr::step (57 samples, 0.19%)fast_float::common::AsciiStr::step_by (57 samples, 0.19%)core::ptr::const_ptr::<impl *const T>::add (57 samples, 0.19%)fast_float::parse (8,159 samples, 27.37%)fast_float::parsefast_float::FastFloat::parse_float (8,159 samples, 27.37%)fast_float::FastFloat::parse_floatfast_float::FastFloat::parse_float_partial (8,159 samples, 27.37%)fast_float::FastFloat::parse_float_partialfast_float::parse::parse_float (8,159 samples, 27.37%)fast_float::parse::parse_floatfast_float::number::parse_number (3,139 samples, 10.53%)fast_float::num..fast_float::number::try_parse_digits (1,703 samples, 5.71%)fast_fl..fast_float::common::AsciiStr::parse_digits (1,703 samples, 5.71%)fast_fl..fast_float::number::try_parse_digits::_{{closure}} (201 samples, 0.67%)core::num::<impl u64>::wrapping_add (127 samples, 0.43%)<core::option::Option<T> as core::ops::try_trait::Try>::branch (218 samples, 0.73%)<*const T as memchr::ext::Pointer>::distance (7 samples, 0.02%)core::ptr::const_ptr::<impl *const T>::offset_from (7 samples, 0.02%)<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::has_non_zero (10 samples, 0.03%)[unknown] (85 samples, 0.29%)[unknown] (53 samples, 0.18%)[unknown] (49 samples, 0.16%)[unknown] (41 samples, 0.14%)[unknown] (24 samples, 0.08%)[unknown] (12 samples, 0.04%)[unknown] (10 samples, 0.03%)[unknown] (7 samples, 0.02%)[unknown] (3 samples, 0.01%)memchr::vector::x86sse2::<impl memchr::vector::Vector for core::core_arch::x86::__m128i>::cmpeq (72 samples, 0.24%)core::core_arch::x86::sse2::_mm_cmpeq_epi8 (72 samples, 0.24%)memchr::arch::generic::memchr::One<V>::search_chunk (1,151 samples, 3.86%)memc..memchr::vector::x86sse2::<impl memchr::vector::Vector for core::core_arch::x86::__m128i>::movemask (241 samples, 0.81%)core::core_arch::x86::sse2::_mm_movemask_epi8 (241 samples, 0.81%)memchr::vector::Vector::movemask_will_have_non_zero (7 samples, 0.02%)memchr::vector::x86sse2::<impl memchr::vector::Vector for core::core_arch::x86::__m128i>::movemask (7 samples, 0.02%)core::core_arch::x86::sse2::_mm_movemask_epi8 (7 samples, 0.02%)core::core_arch::x86::sse2::_mm_cmpeq_epi8 (21 samples, 0.07%)memchr::vector::x86sse2::<impl memchr::vector::Vector for core::core_arch::x86::__m128i>::cmpeq (46 samples, 0.15%)memchr::arch::x86_64::memchr::memchr_raw::find_sse2 (25 samples, 0.08%)[unknown] (6 samples, 0.02%)[unknown] (3 samples, 0.01%)[unknown] (3 samples, 0.01%)[unknown] (3 samples, 0.01%)memchr::memchr::memchr (2,815 samples, 9.44%)memchr::memchr..memchr::arch::generic::memchr::search_slice_with_raw (2,815 samples, 9.44%)memchr::arch::..memchr::memchr::memchr::_{{closure}} (2,408 samples, 8.08%)memchr::mem..memchr::memchr::memchr_raw (2,408 samples, 8.08%)memchr::mem..memchr::arch::x86_64::memchr::memchr_raw (2,408 samples, 8.08%)memchr::arc..memchr::arch::x86_64::memchr::memchr_raw::find_sse2 (1,961 samples, 6.58%)memchr::a..memchr::arch::x86_64::sse2::memchr::One::find_raw (1,767 samples, 5.93%)memchr::..memchr::arch::x86_64::sse2::memchr::One::find_raw_impl (1,262 samples, 4.23%)memch..memchr::arch::generic::memchr::One<V>::find_raw (1,262 samples, 4.23%)memch..memchr::vector::x86sse2::<impl memchr::vector::Vector for core::core_arch::x86::__m128i>::or (9 samples, 0.03%)core::core_arch::x86::sse2::_mm_or_si128 (9 samples, 0.03%)std::collections::hash::map::Entry<K,V>::and_modify (559 samples, 1.88%)s..rust_1brc::calculate_station_values::_{{closure}} (559 samples, 1.88%)r..core::hash::Hasher::write_length_prefix (11 samples, 0.04%)<rustc_hash::FxHasher as core::hash::Hasher>::write_usize (11 samples, 0.04%)rustc_hash::FxHasher::add_to_hash (11 samples, 0.04%)core::num::<impl usize>::wrapping_mul (11 samples, 0.04%)core::slice::index::<impl core::ops::index::Index<I> for [T]>::index (275 samples, 0.92%)<core::ops::range::RangeFrom<usize> as core::slice::index::SliceIndex<[T]>>::index (275 samples, 0.92%)<core::ops::range::RangeFrom<usize> as core::slice::index::SliceIndex<[T]>>::get_unchecked (275 samples, 0.92%)<core::ops::range::Range<usize> as core::slice::index::SliceIndex<[T]>>::get_unchecked (275 samples, 0.92%)core::ptr::const_ptr::<impl *const T>::add (63 samples, 0.21%)<usize as core::ops::bit::BitXor>::bitxor (55 samples, 0.18%)core::num::<impl usize>::rotate_left (1,220 samples, 4.09%)core..<rustc_hash::FxHasher as core::hash::Hasher>::write (2,458 samples, 8.25%)<rustc_hash..rustc_hash::FxHasher::add_to_hash (1,411 samples, 4.73%)rustc_..core::num::<impl usize>::wrapping_mul (136 samples, 0.46%)hashbrown::map::make_hash (2,527 samples, 8.48%)hashbrown::m..core::hash::BuildHasher::hash_one (2,527 samples, 8.48%)core::hash::..core::hash::impls::<impl core::hash::Hash for &T>::hash (2,527 samples, 8.48%)core::hash::..core::hash::impls::<impl core::hash::Hash for &T>::hash (2,527 samples, 8.48%)core::hash::..core::hash::impls::<impl core::hash::Hash for [T]>::hash (2,527 samples, 8.48%)core::hash::..core::hash::impls::<impl core::hash::Hash for u8>::hash_slice (2,516 samples, 8.44%)core::hash::..hashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (58 samples, 0.19%)<hashbrown::raw::bitmask::BitMaskIter as core::iter::traits::iterator::Iterator>::next (1,185 samples, 3.98%)<has..hashbrown::raw::bitmask::BitMask::lowest_set_bit (1,185 samples, 3.98%)hash..core::num::nonzero::NonZero<u16>::new (978 samples, 3.28%)cor..hashbrown::raw::RawTable<T,A>::bucket (474 samples, 1.59%)hashbrown::raw::Bucket<T>::from_base_index (474 samples, 1.59%)core::ptr::mut_ptr::<impl *mut T>::sub (474 samples, 1.59%)core::ptr::mut_ptr::<impl *mut T>::offset (474 samples, 1.59%)<[A] as core::slice::cmp::SlicePartialEq<B>>::equal (6,994 samples, 23.46%)<[A] as core::slice::cmp::SlicePartia..[libc.so.6] (4,850 samples, 16.27%)[libc.so.6]hashbrown::raw::RawTable<T,A>::find::_{{closure}} (7,492 samples, 25.13%)hashbrown::raw::RawTable<T,A>::find::_{{..hashbrown::rustc_entry::_<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry::_{{closure}} (7,018 samples, 23.54%)hashbrown::rustc_entry::_<impl hashbr..core::cmp::impls::<impl core::cmp::PartialEq<&B> for &A>::eq (7,018 samples, 23.54%)core::cmp::impls::<impl core::cmp::Pa..core::slice::cmp::<impl core::cmp::PartialEq<[B]> for [A]>::eq (7,018 samples, 23.54%)core::slice::cmp::<impl core::cmp::Pa..hashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (24 samples, 0.08%)hashbrown::raw::h2 (1,217 samples, 4.08%)hash..hashbrown::raw::sse2::Group::load (331 samples, 1.11%)core::core_arch::x86::sse2::_mm_loadu_si128 (331 samples, 1.11%)hashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (329 samples, 1.10%)hashbrown::raw::sse2::Group::match_byte (1,938 samples, 6.50%)hashbrown..core::core_arch::x86::sse2::_mm_movemask_epi8 (1,938 samples, 6.50%)core::cor..hashbrown::raw::sse2::Group::match_empty (283 samples, 0.95%)hashbrown::raw::sse2::Group::match_byte (283 samples, 0.95%)core::core_arch::x86::sse2::_mm_movemask_epi8 (283 samples, 0.95%)hashbrown::raw::RawTableInner::find_inner (12,826 samples, 43.03%)hashbrown::raw::RawTableInner::find_innerhashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (32 samples, 0.11%)hashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (16,926 samples, 56.78%)hashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entryhashbrown::raw::RawTable<T,A>::find (13,603 samples, 45.63%)hashbrown::raw::RawTable<T,A>::findhashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (5 samples, 0.02%)all (29,810 samples, 100%)rust-1brc (29,810 samples, 100.00%)rust-1brc_start (29,802 samples, 99.97%)_start__libc_start_main (29,802 samples, 99.97%)__libc_start_main[libc.so.6] (29,802 samples, 99.97%)[libc.so.6]main (29,802 samples, 99.97%)mainstd::rt::lang_start_internal (29,802 samples, 99.97%)std::rt::lang_start_internalstd::rt::lang_start::_{{closure}} (29,802 samples, 99.97%)std::rt::lang_start::_{{closure}}std::sys_common::backtrace::__rust_begin_short_backtrace (29,802 samples, 99.97%)std::sys_common::backtrace::__rust_begin_short_backtracecore::ops::function::FnOnce::call_once (29,802 samples, 99.97%)core::ops::function::FnOnce::call_oncerust_1brc::main (29,802 samples, 99.97%)rust_1brc::mainrust_1brc::calculate_station_values (29,802 samples, 99.97%)rust_1brc::calculate_station_valuesstd::collections::hash::map::HashMap<K,V,S>::entry (17,262 samples, 57.91%)std::collections::hash::map::HashMap<K,V,S>::entrystd::collections::hash::map::map_entry (97 samples, 0.33%) \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 51cfa99..729b5d3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use std::time::Instant; use fast_float; use rustc_hash::FxHashMap; use memmap2::Mmap; +use memchr::memchr; #[derive(Parser, Debug)] #[command( @@ -25,44 +26,46 @@ struct StationValues { count: u32, } -fn read_line(data: &[u8]) -> (&[u8], f32) { - let mut parts = data.rsplit(|&c| c == b';'); - let value_str = parts.next().expect("Failed to parse value string"); - let value = fast_float::parse(value_str).expect("Failed to parse value"); - let station_name = parts.next().expect("Failed to parse station name"); - (station_name, value) -} - // Calculate the station values fn calculate_station_values(data:&[u8]) -> FxHashMap<&[u8], StationValues> { let mut result: FxHashMap<&[u8], StationValues> = FxHashMap::default(); - let lines = data.split(|&c| c == b'\n'); - for line in lines { - if line.is_empty() { - continue; - } + let mut buffer = data; + loop { + match memchr(b';', &buffer) { + None => { + break; + } + Some(comma_seperator) => { + let end = memchr(b'\n', &buffer[comma_seperator..]).unwrap(); + let name = &buffer[..comma_seperator]; + let value = &buffer[comma_seperator+1..comma_seperator+end]; + let value = fast_float::parse(value).expect("Failed to parse value"); + + result + .entry(name) + .and_modify(|e| { + if value < e.min { + e.min = value; + } + if value > e.max { + e.max = value; + } + e.mean = e.mean + value; + e.count += 1; + }) + .or_insert(StationValues { + min: value, + max: value, + mean: value, + count: 1, + }); + buffer = &buffer[comma_seperator+end+1..]; + } - let (station_name, value) = read_line(line); - result - .entry(station_name) - .and_modify(|e| { - if value < e.min { - e.min = value; - } - if value > e.max { - e.max = value; - } - e.mean = e.mean + value; - e.count += 1; - }) - .or_insert(StationValues { - min: value, - max: value, - mean: value, - count: 1, - }); + } } + // Calculate the mean for all entries and round off to 1 decimal place for (_, station_values) in result.iter_mut() { station_values.mean = round_off(station_values.mean / station_values.count as f32);