Skip to content

Commit 85f7578

Browse files
committed
create bench_cmp command
1 parent 8df1e5d commit 85f7578

File tree

4 files changed

+155
-6
lines changed

4 files changed

+155
-6
lines changed

Diff for: collector/src/bin/collector.rs

+20-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::{str, time::Instant};
1818
use anyhow::Context;
1919
use clap::builder::TypedValueParser;
2020
use clap::{Arg, Parser};
21+
use collector::compare::compare_artifacts;
2122
use humansize::{format_size, BINARY};
2223
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
2324
use tabled::builder::Builder;
@@ -628,6 +629,18 @@ enum Commands {
628629
#[command(flatten)]
629630
db: DbOption,
630631
},
632+
633+
/// Displays diff between two local bench result.
634+
BenchCmp {
635+
#[command(flatten)]
636+
db: DbOption,
637+
638+
/// The name of the base artifact to be compared.
639+
base: String,
640+
641+
/// The name of the modified artifact to be compared.
642+
modified: String,
643+
},
631644
}
632645

633646
#[derive(Debug, clap::Parser)]
@@ -1187,6 +1200,13 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
11871200
println!("Data of artifact {name} were removed");
11881201
Ok(0)
11891202
}
1203+
Commands::BenchCmp { db, base, modified } => {
1204+
let pool = Pool::open(&db.db);
1205+
let rt = build_async_runtime();
1206+
let conn = rt.block_on(pool.connection());
1207+
rt.block_on(compare_artifacts(conn, base, modified))?;
1208+
Ok(0)
1209+
}
11901210
}
11911211
}
11921212

@@ -1736,7 +1756,6 @@ fn bench_compile(
17361756
category,
17371757
));
17381758
print_intro();
1739-
17401759
let mut processor = BenchProcessor::new(
17411760
tx.conn(),
17421761
benchmark_name,

Diff for: collector/src/compare.rs

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
use database::{metric::Metric, Connection, Lookup};
2+
use tabled::{Table, Tabled};
3+
4+
/// The amount of relative change considered significant when
5+
/// we cannot determine from historical data
6+
pub const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
7+
8+
/// Compare 2 artifacts and print the result.
9+
pub async fn compare_artifacts(
10+
mut conn: Box<dyn Connection>,
11+
base: String,
12+
modified: String,
13+
) -> anyhow::Result<()> {
14+
let index = database::Index::load(&mut *conn).await;
15+
let sids = index
16+
.compile_statistic_descriptions()
17+
.filter(|(&(_, _, _, _, metric), _)| metric.as_str() == Metric::InstructionsUser.as_str())
18+
.map(|(_, sid)| sid)
19+
.collect::<Vec<_>>();
20+
21+
let base_id_number = conn
22+
.artifact_by_name(&base)
23+
.await
24+
.expect("Cannot find specified artifact")
25+
.lookup(&index)
26+
.unwrap();
27+
let modified_id_number = conn
28+
.artifact_by_name(&modified)
29+
.await
30+
.expect("Cannot find specified artifact")
31+
.lookup(&index)
32+
.unwrap();
33+
34+
let pstats = conn
35+
.get_pstats(&sids, &[Some(base_id_number), Some(modified_id_number)])
36+
.await;
37+
let tuple_pstats = pstats
38+
.into_iter()
39+
.map(|row| (row[0], row[1]))
40+
.collect::<Vec<_>>();
41+
#[derive(Tabled)]
42+
struct Regression {
43+
count: usize,
44+
#[tabled(display_with = "display_range")]
45+
range: (Option<f64>, Option<f64>),
46+
#[tabled(display_with = "display_mean")]
47+
mean: Option<f64>,
48+
}
49+
50+
fn format_value(value: Option<f64>) -> String {
51+
match value {
52+
Some(value) => format!("{:+.2}%", value),
53+
None => "-".to_string(),
54+
}
55+
}
56+
57+
fn display_range(&(min, max): &(Option<f64>, Option<f64>)) -> String {
58+
format!("[{}, {}]", &format_value(min), &format_value(max))
59+
}
60+
61+
fn display_mean(value: &Option<f64>) -> String {
62+
match value {
63+
Some(value) => format!("{:+.2}%", value),
64+
None => "-".to_string(),
65+
}
66+
}
67+
68+
impl From<&Vec<f64>> for Regression {
69+
fn from(value: &Vec<f64>) -> Self {
70+
let min = value.iter().copied().min_by(|a, b| a.total_cmp(b));
71+
let max = value.iter().copied().max_by(|a, b| a.total_cmp(b));
72+
let count = value.len();
73+
74+
Regression {
75+
range: (min, max),
76+
count,
77+
mean: if count == 0 {
78+
None
79+
} else {
80+
Some(value.iter().sum::<f64>() / count as f64)
81+
},
82+
}
83+
}
84+
}
85+
86+
let change = tuple_pstats
87+
.iter()
88+
.filter_map(|&(a, b)| match (a, b) {
89+
(Some(a), Some(b)) => {
90+
if a == 0.0 {
91+
None
92+
} else {
93+
Some((b - a) / a)
94+
}
95+
}
96+
(_, _) => None,
97+
})
98+
.filter(|c| c.abs() >= DEFAULT_SIGNIFICANCE_THRESHOLD)
99+
.collect::<Vec<_>>();
100+
println!("{}", change.len());
101+
let negative_change = change
102+
.iter()
103+
.copied()
104+
.filter(|&c| c < 0.0)
105+
.collect::<Vec<_>>();
106+
let positive_change = change
107+
.iter()
108+
.copied()
109+
.filter(|&c| c > 0.0)
110+
.collect::<Vec<_>>();
111+
112+
#[derive(Tabled)]
113+
struct NamedRegression {
114+
name: String,
115+
#[tabled(inline)]
116+
regression: Regression,
117+
}
118+
119+
let regressions = [negative_change, positive_change, change]
120+
.into_iter()
121+
.map(|c| Regression::from(&c))
122+
.zip(["❌", "✅", "✅, ❌"])
123+
.map(|(c, label)| NamedRegression {
124+
name: label.to_string(),
125+
regression: c,
126+
})
127+
.collect::<Vec<_>>();
128+
129+
println!("{}", Table::new(regressions));
130+
131+
Ok(())
132+
}

Diff for: collector/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub mod api;
99
pub mod artifact_stats;
1010
pub mod cargo;
1111
pub mod codegen;
12+
pub mod compare;
1213
pub mod compile;
1314
pub mod runtime;
1415
pub mod toolchain;

Diff for: site/src/comparison.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::api;
66
use crate::github;
77
use crate::load::SiteCtxt;
88

9+
use collector::compare::DEFAULT_SIGNIFICANCE_THRESHOLD;
910
use collector::compile::benchmark::category::Category;
1011
use collector::Bound;
1112
use database::{
@@ -1197,10 +1198,6 @@ pub struct TestResultComparison {
11971198
}
11981199

11991200
impl TestResultComparison {
1200-
/// The amount of relative change considered significant when
1201-
/// we cannot determine from historical data
1202-
const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
1203-
12041201
fn is_regression(&self) -> bool {
12051202
let (a, b) = self.results;
12061203
b > a
@@ -1220,7 +1217,7 @@ impl TestResultComparison {
12201217
self.historical_data
12211218
.as_ref()
12221219
.map(|d| d.significance_threshold())
1223-
.unwrap_or(Self::DEFAULT_SIGNIFICANCE_THRESHOLD)
1220+
.unwrap_or(DEFAULT_SIGNIFICANCE_THRESHOLD)
12241221
}
12251222

12261223
/// This is a numeric magnitude of a particular change.

0 commit comments

Comments
 (0)