From 0d073431c09116f0572152caffbfcb5f8f4a04a4 Mon Sep 17 00:00:00 2001 From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> Date: Tue, 27 Aug 2024 23:18:46 -0400 Subject: [PATCH] feat: add benchmarks for the IndexedCrate::new method (#403) (#408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently uses `criterion` for benchmarks, but another option would be `iai-callgrind` which provides "oneshot" benchmarks (which are way faster and more reliable in busy systems like CI), but AFAIK `iai-callgrind` only supports Linux. Also adds a script to download aws-sdk-ec2 and build the rustdoc JSON. It should work on MacOS, but I don't have a way to test that. It also relies on `rustup` (because we use `cargo +nightly`). Current numbers on my system: ``` IndexedCrate/new(aws-sdk-ec2) time: [1.5561 s 1.5609 s 1.5656 s] ``` Co-authored-by: Jalil David Salamé Messina <60845989+jalil-salame@users.noreply.github.com> --- .gitignore | 5 + Cargo.lock | 379 ++++++++++++++++++++++++++++++ Cargo.toml | 8 + benches/indexed_crate.rs | 29 +++ scripts/prepare_benchmark_data.sh | 54 +++++ 5 files changed, 475 insertions(+) create mode 100644 benches/indexed_crate.rs create mode 100755 scripts/prepare_benchmark_data.sh diff --git a/.gitignore b/.gitignore index 48db3abb..4f369ae4 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,8 @@ test_crates/**/Cargo.lock # ide .idea + +# Benchmarks and profiling related data +flamegraph.svg +perf.data +perf.data.old diff --git a/Cargo.lock b/Cargo.lock index d0e272b5..70c8513e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + [[package]] name = "anyhow" version = "1.0.71" @@ -41,6 +53,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + [[package]] name = "bstr" version = "0.2.17" @@ -52,6 +70,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "bytes" version = "1.4.0" @@ -61,6 +85,70 @@ dependencies = [ "serde", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + [[package]] name = "console" version = "0.15.7" @@ -73,6 +161,73 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "either" version = "1.8.1" @@ -91,12 +246,28 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "indexmap" version = "2.1.0" @@ -108,6 +279,26 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.11.0" @@ -132,6 +323,15 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" +[[package]] +name = "js-sys" +version = "0.3.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -144,6 +344,12 @@ version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + [[package]] name = "maplit" version = "1.0.2" @@ -156,6 +362,27 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "pest" version = "2.7.5" @@ -167,6 +394,34 @@ dependencies = [ "ucd-trie", ] +[[package]] +name = "plotters" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" + +[[package]] +name = "plotters-svg" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +dependencies = [ + "plotters-backend", +] + [[package]] name = "proc-macro2" version = "1.0.64" @@ -185,6 +440,26 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "regex" version = "1.9.1" @@ -235,6 +510,15 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.185" @@ -327,6 +611,16 @@ dependencies = [ "syn", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "trustfall" version = "0.7.1" @@ -343,6 +637,7 @@ name = "trustfall-rustdoc-adapter" version = "29.1.3" dependencies = [ "anyhow", + "criterion", "itertools 0.12.1", "maplit", "rustdoc-types", @@ -405,6 +700,90 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" + +[[package]] +name = "web-sys" +version = "0.3.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-sys" version = "0.45.0" diff --git a/Cargo.toml b/Cargo.toml index 922cb351..5dc58054 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,8 +14,16 @@ readme = "./README.md" trustfall = "0.7.1" rustdoc-types = "0.25.0" +[[bench]] +name = "indexed_crate" +harness = false + +[profile.bench] +debug = 1 # We only need function name information (for flamegraphs) + [dev-dependencies] anyhow = "1.0.58" +criterion = "0.5.1" itertools = "0.12.0" serde_json = "1.0.85" serde = { version = "1.0.185", features = ["derive"] } diff --git a/benches/indexed_crate.rs b/benches/indexed_crate.rs new file mode 100644 index 00000000..79021e14 --- /dev/null +++ b/benches/indexed_crate.rs @@ -0,0 +1,29 @@ +use std::sync::OnceLock; + +use criterion::{criterion_group, criterion_main, Criterion}; +use rustdoc_types::Crate; +use trustfall_rustdoc_adapter::IndexedCrate; + +/// Benchmark [`IndexedCrate::new`] with the aws-sdk-ec2 crate as an input +fn new(c: &mut Criterion) { + let mut group = c.benchmark_group("IndexedCrate"); + let crate_ = get_aws_sdk_crate(); + group.bench_function("new(aws-sdk-ec2)", |b| { + b.iter_with_large_drop(|| IndexedCrate::new(crate_)) + }); + group.finish(); +} + +static AWS_SDK_EC2_CRATE: OnceLock = OnceLock::new(); + +fn get_aws_sdk_crate() -> &'static Crate { + AWS_SDK_EC2_CRATE.get_or_init(|| { + let data = std::fs::read_to_string("localdata/benches/aws-sdk-ec2.json") + .expect("failed to read the rustdoc JSON. Did you forget to run `scripts/prepare_benchmark_data.sh`?"); + serde_json::from_str(data.as_str()).expect("benches/aws-sdk-ec2.json appears to contain invalid JSON") + }) +} + +criterion_group!(benches, new); + +criterion_main!(benches); diff --git a/scripts/prepare_benchmark_data.sh b/scripts/prepare_benchmark_data.sh new file mode 100755 index 00000000..05558005 --- /dev/null +++ b/scripts/prepare_benchmark_data.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +aws_sdk_version="${AWS_SDK_VERSION:-release-2024-08-22}" + +bail() { + echo "ERROR: $1" + exit 1 +} + +repo_path="$(realpath "$(dirname "$0")")"/.. +bench_data_path="${repo_path}/localdata/benches/aws-sdk-ec2.json" + +mkdir -p "${repo_path}/localdata/benches" + +if [ -f "$bench_data_path" ]; then + echo "Reusing bench input data: already present at $bench_data_path" + echo "help: delete the file if you want to regenerate it" + echo " rm -v '$bench_data_path'" + exit 0 +fi + +# FIXME: This is a Linux specific path, on MacOS it should instead point to $HOME/Library/Caches/ +# see: https://docs.rs/directories/latest/directories/struct.ProjectDirs.html#method.cache_dir +cache_path="${XDG_CACHE_HOME:-$HOME/.cache}" +download_url="https://github.com/awslabs/aws-sdk-rust/archive/refs/tags/${aws_sdk_version}.tar.gz" + +source_tarball="${cache_path}/aws-sdk-rust-${aws_sdk_version}.tar.gz" + +if [ -f "$source_tarball" ]; then + echo "Reusing source tarball: already present at $source_tarball" + echo "help: delete the file if you want to download it again" + echo " rm -v '$source_tarball'" +else + echo "Downloading sources..." + curl --proto '=https' --tlsv1.2 -SfL "$download_url" -o "$source_tarball" || bail "failed to download aws-sdk-rust sources" +fi + +source_dir="${cache_path}/aws-sdk-rust-${aws_sdk_version}" + +if [ -d "$source_dir" ]; then + echo "Reusing extracted sources: already present at $source_dir" + echo "help: delete the directory if you want to extract the source tarball again" + echo " rm -rv '$source_dir'" +else + echo "Extracting sources..." + tar -x -f "${source_tarball}" -C "${cache_path}" +fi + +cd "${source_dir}/sdk/ec2" || bail "failed to cd into the extracted sources" + +echo "Generating rustdoc JSON..." +RUSTDOCFLAGS="-Z unstable-options --document-private-items --document-hidden-items --output-format=json --cap-lints=allow" cargo +nightly doc --lib --no-deps || bail "failed to generate rustdoc JSON" + +cp -v "$source_dir/target/doc/aws_sdk_ec2.json" "$bench_data_path"