create bench_cmp command

s7tya · s7tya · commit 85f7578999c7 · 2024-08-06T16:36:16.000+09:00
diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs
@@ -18,6 +18,7 @@ use std::{str, time::Instant};
 use anyhow::Context;
 use clap::builder::TypedValueParser;
 use clap::{Arg, Parser};
+use collector::compare::compare_artifacts;
 use humansize::{format_size, BINARY};
 use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
 use tabled::builder::Builder;
@@ -628,6 +629,18 @@ enum Commands {
         #[command(flatten)]
         db: DbOption,
     },
+
+    /// Displays diff between two local bench result.
+    BenchCmp {
+        #[command(flatten)]
+        db: DbOption,
+
+        /// The name of the base artifact to be compared.
+        base: String,
+
+        /// The name of the modified artifact to be compared.
+        modified: String,
+    },
 }
 
 #[derive(Debug, clap::Parser)]
@@ -1187,6 +1200,13 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
             println!("Data of artifact {name} were removed");
             Ok(0)
         }
+        Commands::BenchCmp { db, base, modified } => {
+            let pool = Pool::open(&db.db);
+            let rt = build_async_runtime();
+            let conn = rt.block_on(pool.connection());
+            rt.block_on(compare_artifacts(conn, base, modified))?;
+            Ok(0)
+        }
     }
 }
 
@@ -1736,7 +1756,6 @@ fn bench_compile(
                 category,
             ));
             print_intro();
-
             let mut processor = BenchProcessor::new(
                 tx.conn(),
                 benchmark_name,
diff --git a/collector/src/compare.rs b/collector/src/compare.rs
@@ -0,0 +1,132 @@
+use database::{metric::Metric, Connection, Lookup};
+use tabled::{Table, Tabled};
+
+/// The amount of relative change considered significant when
+/// we cannot determine from historical data
+pub const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
+
+/// Compare 2 artifacts and print the result.
+pub async fn compare_artifacts(
+    mut conn: Box<dyn Connection>,
+    base: String,
+    modified: String,
+) -> anyhow::Result<()> {
+    let index = database::Index::load(&mut *conn).await;
+    let sids = index
+        .compile_statistic_descriptions()
+        .filter(|(&(_, _, _, _, metric), _)| metric.as_str() == Metric::InstructionsUser.as_str())
+        .map(|(_, sid)| sid)
+        .collect::<Vec<_>>();
+
+    let base_id_number = conn
+        .artifact_by_name(&base)
+        .await
+        .expect("Cannot find specified artifact")
+        .lookup(&index)
+        .unwrap();
+    let modified_id_number = conn
+        .artifact_by_name(&modified)
+        .await
+        .expect("Cannot find specified artifact")
+        .lookup(&index)
+        .unwrap();
+
+    let pstats = conn
+        .get_pstats(&sids, &[Some(base_id_number), Some(modified_id_number)])
+        .await;
+    let tuple_pstats = pstats
+        .into_iter()
+        .map(|row| (row[0], row[1]))
+        .collect::<Vec<_>>();
+    #[derive(Tabled)]
+    struct Regression {
+        count: usize,
+        #[tabled(display_with = "display_range")]
+        range: (Option<f64>, Option<f64>),
+        #[tabled(display_with = "display_mean")]
+        mean: Option<f64>,
+    }
+
+    fn format_value(value: Option<f64>) -> String {
+        match value {
+            Some(value) => format!("{:+.2}%", value),
+            None => "-".to_string(),
+        }
+    }
+
+    fn display_range(&(min, max): &(Option<f64>, Option<f64>)) -> String {
+        format!("[{}, {}]", &format_value(min), &format_value(max))
+    }
+
+    fn display_mean(value: &Option<f64>) -> String {
+        match value {
+            Some(value) => format!("{:+.2}%", value),
+            None => "-".to_string(),
+        }
+    }
+
+    impl From<&Vec<f64>> for Regression {
+        fn from(value: &Vec<f64>) -> Self {
+            let min = value.iter().copied().min_by(|a, b| a.total_cmp(b));
+            let max = value.iter().copied().max_by(|a, b| a.total_cmp(b));
+            let count = value.len();
+
+            Regression {
+                range: (min, max),
+                count,
+                mean: if count == 0 {
+                    None
+                } else {
+                    Some(value.iter().sum::<f64>() / count as f64)
+                },
+            }
+        }
+    }
+
+    let change = tuple_pstats
+        .iter()
+        .filter_map(|&(a, b)| match (a, b) {
+            (Some(a), Some(b)) => {
+                if a == 0.0 {
+                    None
+                } else {
+                    Some((b - a) / a)
+                }
+            }
+            (_, _) => None,
+        })
+        .filter(|c| c.abs() >= DEFAULT_SIGNIFICANCE_THRESHOLD)
+        .collect::<Vec<_>>();
+    println!("{}", change.len());
+    let negative_change = change
+        .iter()
+        .copied()
+        .filter(|&c| c < 0.0)
+        .collect::<Vec<_>>();
+    let positive_change = change
+        .iter()
+        .copied()
+        .filter(|&c| c > 0.0)
+        .collect::<Vec<_>>();
+
+    #[derive(Tabled)]
+    struct NamedRegression {
+        name: String,
+        #[tabled(inline)]
+        regression: Regression,
+    }
+
+    let regressions = [negative_change, positive_change, change]
+        .into_iter()
+        .map(|c| Regression::from(&c))
+        .zip(["❌", "✅", "✅, ❌"])
+        .map(|(c, label)| NamedRegression {
+            name: label.to_string(),
+            regression: c,
+        })
+        .collect::<Vec<_>>();
+
+    println!("{}", Table::new(regressions));
+
+    Ok(())
+}
diff --git a/collector/src/lib.rs b/collector/src/lib.rs
@@ -9,6 +9,7 @@ pub mod api;
 pub mod artifact_stats;
 pub mod cargo;
 pub mod codegen;
+pub mod compare;
 pub mod compile;
 pub mod runtime;
 pub mod toolchain;
diff --git a/site/src/comparison.rs b/site/src/comparison.rs
@@ -6,6 +6,7 @@ use crate::api;
 use crate::github;
 use crate::load::SiteCtxt;
 
+use collector::compare::DEFAULT_SIGNIFICANCE_THRESHOLD;
 use collector::compile::benchmark::category::Category;
 use collector::Bound;
 use database::{
@@ -1197,10 +1198,6 @@ pub struct TestResultComparison {
 }
 
 impl TestResultComparison {
-    /// The amount of relative change considered significant when
-    /// we cannot determine from historical data
-    const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
-
     fn is_regression(&self) -> bool {
         let (a, b) = self.results;
         b > a
@@ -1220,7 +1217,7 @@ impl TestResultComparison {
         self.historical_data
             .as_ref()
             .map(|d| d.significance_threshold())
-            .unwrap_or(Self::DEFAULT_SIGNIFICANCE_THRESHOLD)
+            .unwrap_or(DEFAULT_SIGNIFICANCE_THRESHOLD)
     }
 
     /// This is a numeric magnitude of a particular change.