split bench_cmp's logic to src/compare.rs

s7tya · s7tya · commit d20abeb86eed · 2024-07-29T18:56:45.000+09:00
diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs
@@ -18,13 +18,12 @@ use std::{str, time::Instant};
 use anyhow::Context;
 use clap::builder::TypedValueParser;
 use clap::{Arg, Parser};
+use collector::compare::compare_artifacts;
 use humansize::{format_size, BINARY};
-use itertools::Itertools;
 use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
 use tabled::builder::Builder;
 use tabled::settings::object::{Columns, Rows};
 use tabled::settings::{Alignment, Border, Color, Modify};
-use tabled::{Table, Tabled};
 use tokio::runtime::Runtime;
 
 use collector::api::next_artifact::NextArtifact;
@@ -54,7 +53,7 @@ use collector::toolchain::{
 use collector::utils::cachegrind::cachegrind_diff;
 use collector::utils::{is_installed, wait_for_future};
 use collector::{utils, CollectorCtx, CollectorStepBuilder};
-use database::{ArtifactId, ArtifactIdNumber, Commit, CommitType, Connection, Lookup, Pool};
+use database::{ArtifactId, ArtifactIdNumber, Commit, CommitType, Connection, Pool};
 
 fn n_normal_benchmarks_remaining(n: usize) -> String {
     let suffix = if n == 1 { "" } else { "s" };
@@ -637,10 +636,10 @@ enum Commands {
         db: DbOption,
 
         #[arg(long)]
-        a_id: String,
+        base: String,
 
         #[arg(long)]
-        b_id: String,
+        modified: String,
     },
 }
 
@@ -1201,111 +1200,11 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
             println!("Data of artifact {name} were removed");
             Ok(0)
         }
-        Commands::BenchLocalDiff { db, a_id, b_id } => {
+        Commands::BenchLocalDiff { db, base, modified } => {
             let pool = Pool::open(&db.db);
             let rt = build_async_runtime();
-            let mut conn = rt.block_on(pool.connection());
-            let index = rt.block_on(database::Index::load(&mut *conn));
-
-            let sids = index
-                .compile_statistic_descriptions()
-                .map(|(_, sid)| sid)
-                .collect::<Vec<_>>();
-
-            let a_id_number = rt
-                .block_on(conn.artifact_by_name(&a_id))
-                .expect("Cannot find specified artifact")
-                .lookup(&index)
-                .unwrap();
-            let b_id_number = rt
-                .block_on(conn.artifact_by_name(&b_id))
-                .expect("Cannot find specified artifact")
-                .lookup(&index)
-                .unwrap();
-
-            let pstats =
-                rt.block_on(conn.get_pstats(&sids, &[Some(a_id_number), Some(b_id_number)]));
-            let tuple_pstats = pstats
-                .into_iter()
-                .map(|row| row.into_iter().collect_tuple::<(_, _)>().unwrap())
-                .collect::<Vec<(Option<f64>, Option<f64>)>>();
-
-            #[derive(Tabled)]
-            struct Regression {
-                count: usize,
-                #[tabled(display_with = "display_range")]
-                range: (f64, f64),
-                #[tabled(display_with = "display_mean")]
-                mean: f64,
-            }
-
-            fn display_range(value: &(f64, f64)) -> String {
-                format!("[{:+.2}%, {:+.2}%]", value.0, value.1)
-            }
-
-            fn display_mean(value: &f64) -> String {
-                format!("{:+.2}%", value)
-            }
-
-            impl From<&Vec<f64>> for Regression {
-                fn from(value: &Vec<f64>) -> Self {
-                    let min = *value.iter().min_by(|a, b| a.total_cmp(b)).unwrap();
-                    let max = *value.iter().max_by(|a, b| a.total_cmp(b)).unwrap();
-                    let count = value.len();
-
-                    Regression {
-                        range: (min, max),
-                        count,
-                        mean: (value.iter().sum::<f64>() / count as f64),
-                    }
-                }
-            }
-
-            const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
-            let change = tuple_pstats
-                .iter()
-                .filter_map(|&(a, b)| match (a, b) {
-                    (Some(a), Some(b)) => {
-                        if a == 0.0 {
-                            None
-                        } else {
-                            Some((b - a) / a)
-                        }
-                    }
-                    (_, _) => None,
-                })
-                .filter(|c| c.abs() >= DEFAULT_SIGNIFICANCE_THRESHOLD * 100.0)
-                .collect::<Vec<_>>();
-            let negative_change = change
-                .iter()
-                .copied()
-                .filter(|&c| c < 0.0)
-                .collect::<Vec<_>>();
-            let positive_change = change
-                .iter()
-                .copied()
-                .filter(|&c| c > 0.0)
-                .collect::<Vec<_>>();
-
-            #[derive(Tabled)]
-            struct NamedRegression {
-                name: String,
-                #[tabled(inline)]
-                regression: Regression,
-            }
-
-            let regressions = [negative_change, positive_change, change]
-                .into_iter()
-                .map(|c| Regression::from(&c))
-                .zip(["❌", "✅", "✅, ❌"])
-                .map(|(c, label)| NamedRegression {
-                    name: label.to_string(),
-                    regression: c,
-                })
-                .collect::<Vec<_>>();
-
-            println!("{}", Table::new(regressions));
-
+            let conn = rt.block_on(pool.connection());
+            rt.block_on(compare_artifacts(conn, base, modified));
             Ok(0)
         }
     }
diff --git a/collector/src/compare.rs b/collector/src/compare.rs
@@ -0,0 +1,110 @@
+use database::{Connection, Lookup};
+use itertools::Itertools;
+use tabled::{Table, Tabled};
+
+const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
+
+/// Compare 2 artifacts and print the result.
+pub async fn compare_artifacts(mut conn: Box<dyn Connection>, base: String, modified: String) {
+    let index = database::Index::load(&mut *conn).await;
+    let sids = index
+        .compile_statistic_descriptions()
+        .map(|(_, sid)| sid)
+        .collect::<Vec<_>>();
+
+    let base_id_number = conn
+        .artifact_by_name(&base)
+        .await
+        .expect("Cannot find specified artifact")
+        .lookup(&index)
+        .unwrap();
+    let modified_id_number = conn
+        .artifact_by_name(&modified)
+        .await
+        .expect("Cannot find specified artifact")
+        .lookup(&index)
+        .unwrap();
+
+    let pstats = conn
+        .get_pstats(&sids, &[Some(base_id_number), Some(modified_id_number)])
+        .await;
+    let tuple_pstats = pstats
+        .into_iter()
+        .map(|row| row.into_iter().collect_tuple::<(_, _)>().unwrap())
+        .collect::<Vec<(Option<f64>, Option<f64>)>>();
+
+    #[derive(Tabled)]
+    struct Regression {
+        count: usize,
+        #[tabled(display_with = "display_range")]
+        range: (f64, f64),
+        #[tabled(display_with = "display_mean")]
+        mean: f64,
+    }
+
+    fn display_range(value: &(f64, f64)) -> String {
+        format!("[{:+.2}%, {:+.2}%]", value.0, value.1)
+    }
+
+    fn display_mean(value: &f64) -> String {
+        format!("{:+.2}%", value)
+    }
+
+    impl From<&Vec<f64>> for Regression {
+        fn from(value: &Vec<f64>) -> Self {
+            let min = *value.iter().min_by(|a, b| a.total_cmp(b)).unwrap();
+            let max = *value.iter().max_by(|a, b| a.total_cmp(b)).unwrap();
+            let count = value.len();
+
+            Regression {
+                range: (min, max),
+                count,
+                mean: (value.iter().sum::<f64>() / count as f64),
+            }
+        }
+    }
+
+    let change = tuple_pstats
+        .iter()
+        .filter_map(|&(a, b)| match (a, b) {
+            (Some(a), Some(b)) => {
+                if a == 0.0 {
+                    None
+                } else {
+                    Some((b - a) / a)
+                }
+            }
+            (_, _) => None,
+        })
+        .filter(|c| c.abs() >= DEFAULT_SIGNIFICANCE_THRESHOLD * 100.0)
+        .collect::<Vec<_>>();
+    let negative_change = change
+        .iter()
+        .copied()
+        .filter(|&c| c < 0.0)
+        .collect::<Vec<_>>();
+    let positive_change = change
+        .iter()
+        .copied()
+        .filter(|&c| c > 0.0)
+        .collect::<Vec<_>>();
+
+    #[derive(Tabled)]
+    struct NamedRegression {
+        name: String,
+        #[tabled(inline)]
+        regression: Regression,
+    }
+
+    let regressions = [negative_change, positive_change, change]
+        .into_iter()
+        .map(|c| Regression::from(&c))
+        .zip(["❌", "✅", "✅, ❌"])
+        .map(|(c, label)| NamedRegression {
+            name: label.to_string(),
+            regression: c,
+        })
+        .collect::<Vec<_>>();
+
+    println!("{}", Table::new(regressions));
+}
diff --git a/collector/src/lib.rs b/collector/src/lib.rs
@@ -9,6 +9,7 @@ pub mod api;
 pub mod artifact_stats;
 pub mod cargo;
 pub mod codegen;
+pub mod compare;
 pub mod compile;
 pub mod runtime;
 pub mod toolchain;