Skip to content

Commit f69fa6a

Browse files
emilkteh-cmc
andauthored
Add memory overhead test for the datastore (#6067)
Part of #6066 Shows that datastore currently uses 936 bytes to store a 16 byte RowId, a 8 byte TimeInt, and a 8 byte f64, i.e. around 29x the memory use it should have. ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/6067?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/6067?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! - [PR Build Summary](https://build.rerun.io/pr/6067) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`. --------- Co-authored-by: Clement Rey <cr.rey.clement@gmail.com>
1 parent 72e80c6 commit f69fa6a

File tree

5 files changed

+122
-2
lines changed

5 files changed

+122
-2
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/re_data_store/Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ deadlock_detection = ["parking_lot/deadlock_detection"]
3030
# Rerun dependencies:
3131
re_format.workspace = true
3232
re_format_arrow.workspace = true
33-
re_log_types.workspace = true
3433
re_log = { workspace = true, features = ["setup"] }
34+
re_log_types.workspace = true
3535
re_tracing.workspace = true
3636
re_types_core.workspace = true
3737

@@ -50,10 +50,12 @@ web-time.workspace = true
5050

5151

5252
[dev-dependencies]
53+
re_format.workspace = true
5354
re_types = { workspace = true, features = ["datagen", "testing"] }
5455

5556
anyhow.workspace = true
5657
criterion.workspace = true
58+
insta.workspace = true
5759
mimalloc.workspace = true
5860
rand.workspace = true
5961
similar-asserts.workspace = true
+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//! Measures the memory overhead of the data store.
2+
3+
use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
4+
5+
thread_local! {
6+
static LIVE_BYTES_IN_THREAD: AtomicUsize = AtomicUsize::new(0);
7+
}
8+
9+
pub struct TrackingAllocator {
10+
allocator: std::alloc::System,
11+
}
12+
13+
#[global_allocator]
14+
pub static GLOBAL_ALLOCATOR: TrackingAllocator = TrackingAllocator {
15+
allocator: std::alloc::System,
16+
};
17+
18+
#[allow(unsafe_code)]
19+
// SAFETY:
20+
// We just do book-keeping and then let another allocator do all the actual work.
21+
unsafe impl std::alloc::GlobalAlloc for TrackingAllocator {
22+
#[allow(clippy::let_and_return)]
23+
unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
24+
LIVE_BYTES_IN_THREAD.with(|bytes| bytes.fetch_add(layout.size(), Relaxed));
25+
26+
// SAFETY:
27+
// Just deferring
28+
unsafe { self.allocator.alloc(layout) }
29+
}
30+
31+
unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
32+
LIVE_BYTES_IN_THREAD.with(|bytes| bytes.fetch_sub(layout.size(), Relaxed));
33+
34+
// SAFETY:
35+
// Just deferring
36+
unsafe { self.allocator.dealloc(ptr, layout) };
37+
}
38+
}
39+
40+
fn live_bytes() -> usize {
41+
LIVE_BYTES_IN_THREAD.with(|bytes| bytes.load(Relaxed))
42+
}
43+
44+
/// Assumes all allocations are on the calling thread.
45+
///
46+
/// The reason we use thread-local counting is so that
47+
/// the counting won't be confused by any other running threads (e.g. other tests).
48+
fn memory_use<R>(run: impl Fn() -> R) -> usize {
49+
let used_bytes_start = live_bytes();
50+
let ret = run();
51+
let bytes_used = live_bytes() - used_bytes_start;
52+
drop(ret);
53+
bytes_used
54+
}
55+
56+
// ----------------------------------------------------------------------------
57+
58+
use re_data_store::{DataStore, DataStoreConfig};
59+
use re_log_types::{DataRow, RowId, TimePoint, TimeType, Timeline};
60+
use re_types::components::{InstanceKey, Scalar};
61+
use re_types_core::Loggable as _;
62+
63+
/// The memory overhead of storing many scalars in the store.
64+
#[test]
65+
fn scalar_memory_overhead() {
66+
re_log::setup_logging();
67+
68+
const NUM_SCALARS: usize = 1024 * 1024;
69+
70+
let total_mem_use = memory_use(|| {
71+
let mut store = DataStore::new(
72+
re_log_types::StoreId::random(re_log_types::StoreKind::Recording),
73+
InstanceKey::name(),
74+
DataStoreConfig::default(),
75+
);
76+
77+
for i in 0..NUM_SCALARS {
78+
let entity_path = re_log_types::entity_path!("scalar");
79+
let timepoint =
80+
TimePoint::default().with(Timeline::new("log_time", TimeType::Time), i as i64);
81+
let num_instances = 1;
82+
let row = DataRow::from_cells1_sized(
83+
RowId::new(),
84+
entity_path,
85+
timepoint,
86+
num_instances,
87+
vec![Scalar(i as f64)],
88+
)
89+
.unwrap();
90+
store.insert_row(&row).unwrap();
91+
}
92+
93+
store
94+
});
95+
96+
insta::assert_debug_snapshot!(
97+
"scalars_on_one_timeline",
98+
[
99+
format!("{NUM_SCALARS} scalars"),
100+
format!("{} in total", re_format::format_bytes(total_mem_use as _)),
101+
format!(
102+
"{} per row",
103+
re_format::format_bytes(total_mem_use as f64 / NUM_SCALARS as f64)
104+
),
105+
]
106+
);
107+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
source: crates/re_data_store/tests/memory_test.rs
3+
assertion_line: 96
4+
expression: "[format!(\"{NUM_SCALARS} scalars\"),\n format!(\"{} in total\", re_format::format_bytes(total_mem_use as _)),\n format!(\"{} per row\",\n re_format::format_bytes(total_mem_use as f64 / NUM_SCALARS as\n f64))]"
5+
---
6+
[
7+
"1048576 scalars",
8+
"936 MiB in total",
9+
"936 B per row",
10+
]

pixi.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ meilisearch = "meilisearch --db-path=./meilisearch/data.ms --dump-dir=./meilisea
186186
download-design-tokens = "curl --fail https://rerun-docs.netlify.app/api/tokens | jq > crates/re_ui/data/design_tokens.json"
187187

188188
# Update the results of `insta` snapshot regression tests
189-
rs-update-insta-tests = "cargo test && cargo insta review"
189+
rs-update-insta-tests = "cargo test ; cargo insta review"
190190

191191
# Upload image to gcloud storage.
192192
upload-image = "python scripts/upload_image.py"

0 commit comments

Comments
 (0)