From c614dfc0d4eadcd19b188ff9c7e226c138f894a1 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 12 Sep 2018 16:26:17 +0300 Subject: [PATCH] *Display* aggregates only. (#665) There is a flag https://github.com/google/benchmark/blob/d9cab612e40017af10bddaa5b60c7067032a9e1c/src/benchmark.cc#L75-L78 and a call https://github.com/google/benchmark/blob/d9cab612e40017af10bddaa5b60c7067032a9e1c/include/benchmark/benchmark.h#L837-L840 But that affects everything, every reporter, destination: https://github.com/google/benchmark/blob/d9cab612e40017af10bddaa5b60c7067032a9e1c/src/benchmark.cc#L316 It would be quite useful to have an ability to be more picky. More specifically, i would like to be able to only see the aggregates in the on-screen output, but for the file output to still contain everything. The former is useful in case of a lot of repetition (or even more so if every iteration is reported separately), while the former is **great** for tooling. Fixes https://github.com/google/benchmark/issues/664 --- README.md | 20 +++++-- include/benchmark/benchmark.h | 22 ++++++- src/benchmark.cc | 90 +++++++++++++++++++++------- src/benchmark_register.cc | 16 +++++ test/CMakeLists.txt | 6 ++ test/display_aggregates_only_test.cc | 40 +++++++++++++ test/output_test.h | 7 +++ test/output_test_helper.cc | 37 ++++++++++++ test/report_aggregates_only_test.cc | 36 +++++++++++ test/reporter_output_test.cc | 27 +++++++++ 10 files changed, 270 insertions(+), 31 deletions(-) create mode 100644 test/display_aggregates_only_test.cc create mode 100644 test/report_aggregates_only_test.cc diff --git a/README.md b/README.md index 2f2d78f6ef..38203958f1 100644 --- a/README.md +++ b/README.md @@ -545,12 +545,20 @@ The number of runs of each benchmark is specified globally by the `Repetitions` on the registered benchmark object. When a benchmark is run more than once the mean, median and standard deviation of the runs will be reported. -Additionally the `--benchmark_report_aggregates_only={true|false}` flag or -`ReportAggregatesOnly(bool)` function can be used to change how repeated tests -are reported. By default the result of each repeated run is reported. When this -option is `true` only the mean, median and standard deviation of the runs is reported. -Calling `ReportAggregatesOnly(bool)` on a registered benchmark object overrides -the value of the flag for that benchmark. +Additionally the `--benchmark_report_aggregates_only={true|false}`, +`--benchmark_display_aggregates_only={true|false}` flags or +`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be +used to change how repeated tests are reported. By default the result of each +repeated run is reported. When `report aggregates only` option is `true`, +only the aggregates (i.e. mean, median and standard deviation, maybe complexity +measurements if they were requested) of the runs is reported, to both the +reporters - standard output (console), and the file. +However when only the `display aggregates only` option is `true`, +only the aggregates are displayed in the standard output, while the file +output still contains everything. +Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a +registered benchmark object overrides the value of the appropriate flag for that +benchmark. ## User-defined statistics for repeated benchmarks While having mean, median and standard deviation is nice, this may not be diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 591b45c36b..b7a89988cb 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -438,9 +438,21 @@ enum AggregationReportMode : unsigned #else #endif -{ ARM_Unspecified, // The mode has not been manually specified - ARM_Default, // The mode is user-specified as default. - ARM_ReportAggregatesOnly }; +{ + // The mode has not been manually specified + ARM_Unspecified = 0, + // The mode is user-specified. + // This may or may not be set when the following bit-flags are set. + ARM_Default = 1U << 0U, + // File reporter should only output aggregates. + ARM_FileReportAggregatesOnly = 1U << 1U, + // Display reporter should only output aggregates + ARM_DisplayReportAggregatesOnly = 1U << 2U, + // Both reporters should only display aggregates. + ARM_ReportAggregatesOnly = + ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly +}; + } // namespace internal // State is passed to a running Benchmark and contains state for the @@ -862,8 +874,12 @@ class Benchmark { // Specify if each repetition of the benchmark should be reported separately // or if only the final statistics should be reported. If the benchmark // is not repeated then the single result is always reported. + // Applies to *ALL* reporters (display and file). Benchmark* ReportAggregatesOnly(bool value = true); + // Same as ReportAggregatesOnly(), but applies to display reporter only. + Benchmark* DisplayAggregatesOnly(bool value = true); + // If a particular benchmark is I/O bound, runs multiple threads internally or // if for some reason CPU timings are not representative, call this method. If // called, the elapsed time will be used to control how many iterations are diff --git a/src/benchmark.cc b/src/benchmark.cc index c3f3a6d144..0c92ae6aa9 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -34,6 +34,7 @@ #include #include #include +#include #include "check.h" #include "colorprint.h" @@ -72,10 +73,19 @@ DEFINE_int32(benchmark_repetitions, 1, "The number of runs of each benchmark. If greater than 1, the " "mean and standard deviation of the runs will be reported."); -DEFINE_bool(benchmark_report_aggregates_only, false, - "Report the result of each benchmark repetitions. When 'true' is " - "specified only the mean, standard deviation, and other statistics " - "are reported for repeated benchmarks."); +DEFINE_bool( + benchmark_report_aggregates_only, false, + "Report the result of each benchmark repetitions. When 'true' is specified " + "only the mean, standard deviation, and other statistics are reported for " + "repeated benchmarks. Affects all reporters."); + +DEFINE_bool( + benchmark_display_aggregates_only, false, + "Display the result of each benchmark repetitions. When 'true' is " + "specified only the mean, standard deviation, and other statistics are " + "displayed for repeated benchmarks. Unlike " + "benchmark_report_aggregates_only, only affects the display reporter, but " + "*NOT* file reporter, which will still contain all the output."); DEFINE_string(benchmark_format, "console", "The format to use for console output. Valid values are " @@ -193,10 +203,18 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, manager->NotifyThreadComplete(); } -std::vector RunBenchmark( +struct RunResults { + std::vector non_aggregates; + std::vector aggregates_only; + + bool display_report_aggregates_only = false; + bool file_report_aggregates_only = false; +}; + +RunResults RunBenchmark( const benchmark::internal::Benchmark::Instance& b, std::vector* complexity_reports) { - std::vector reports; // return value + RunResults run_results; const bool has_explicit_iteration_count = b.iterations != 0; size_t iters = has_explicit_iteration_count ? b.iterations : 1; @@ -204,11 +222,20 @@ std::vector RunBenchmark( std::vector pool(b.threads - 1); const int repeats = b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; - const bool report_aggregates_only = - repeats != 1 && - (b.aggregation_report_mode == internal::ARM_Unspecified - ? FLAGS_benchmark_report_aggregates_only - : b.aggregation_report_mode == internal::ARM_ReportAggregatesOnly); + if (repeats != 1) { + run_results.display_report_aggregates_only = + (FLAGS_benchmark_report_aggregates_only || + FLAGS_benchmark_display_aggregates_only); + run_results.file_report_aggregates_only = + FLAGS_benchmark_report_aggregates_only; + if (b.aggregation_report_mode != internal::ARM_Unspecified) { + run_results.display_report_aggregates_only = + (b.aggregation_report_mode & + internal::ARM_DisplayReportAggregatesOnly); + run_results.file_report_aggregates_only = + (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); + } + } for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { for (;;) { // Try benchmark @@ -281,7 +308,7 @@ std::vector RunBenchmark( b, results, memory_iterations, memory_result, seconds); if (!report.error_occurred && b.complexity != oNone) complexity_reports->push_back(report); - reports.push_back(report); + run_results.non_aggregates.push_back(report); break; } @@ -304,18 +331,20 @@ std::vector RunBenchmark( iters = static_cast(next_iters + 0.5); } } + // Calculate additional statistics - auto stat_reports = ComputeStats(reports); + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + + // Maybe calculate complexity report if ((b.complexity != oNone) && b.last_benchmark_instance) { auto additional_run_stats = ComputeBigO(*complexity_reports); - stat_reports.insert(stat_reports.end(), additional_run_stats.begin(), - additional_run_stats.end()); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); complexity_reports->clear(); } - if (report_aggregates_only) reports.clear(); - reports.insert(reports.end(), stat_reports.begin(), stat_reports.end()); - return reports; + return run_results; } } // namespace @@ -462,11 +491,25 @@ void RunBenchmarks(const std::vector& benchmarks, (!file_reporter || file_reporter->ReportContext(context))) { flushStreams(display_reporter); flushStreams(file_reporter); + for (const auto& benchmark : benchmarks) { - std::vector reports = - RunBenchmark(benchmark, &complexity_reports); - display_reporter->ReportRuns(reports); - if (file_reporter) file_reporter->ReportRuns(reports); + RunResults run_results = RunBenchmark(benchmark, &complexity_reports); + + auto report = [&run_results](BenchmarkReporter* reporter, + bool report_aggregates_only) { + assert(reporter); + assert( + !(report_aggregates_only && run_results.aggregates_only.empty())); + if (!report_aggregates_only) + reporter->ReportRuns(run_results.non_aggregates); + if (!run_results.aggregates_only.empty()) + reporter->ReportRuns(run_results.aggregates_only); + }; + + report(display_reporter, run_results.display_report_aggregates_only); + if (file_reporter) + report(file_reporter, run_results.file_report_aggregates_only); + flushStreams(display_reporter); flushStreams(file_reporter); } @@ -596,6 +639,7 @@ void PrintUsageAndExit() { " [--benchmark_min_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_report_aggregates_only={true|false}]\n" + " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=]\n" " [--benchmark_out=]\n" " [--benchmark_out_format=]\n" @@ -619,6 +663,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { &FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || + ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", + &FLAGS_benchmark_display_aggregates_only) || ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || ParseStringFlag(argv[i], "benchmark_out_format", diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 8ee30adf0f..ffe97a1416 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -373,6 +373,22 @@ Benchmark* Benchmark::ReportAggregatesOnly(bool value) { return this; } +Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { + // If we were called, the report mode is no longer 'unspecified', in any case. + aggregation_report_mode_ = static_cast( + aggregation_report_mode_ | ARM_Default); + + if (value) { + aggregation_report_mode_ = static_cast( + aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly); + } else { + aggregation_report_mode_ = static_cast( + aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly); + } + + return this; +} + Benchmark* Benchmark::UseRealTime() { CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index aa2058adce..4269991755 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -125,6 +125,12 @@ add_test(templated_fixture_test templated_fixture_test --benchmark_min_time=0.01 compile_output_test(user_counters_test) add_test(user_counters_test user_counters_test --benchmark_min_time=0.01) +compile_output_test(report_aggregates_only_test) +add_test(report_aggregates_only_test report_aggregates_only_test --benchmark_min_time=0.01) + +compile_output_test(display_aggregates_only_test) +add_test(display_aggregates_only_test display_aggregates_only_test --benchmark_min_time=0.01) + compile_output_test(user_counters_tabular_test) add_test(user_counters_tabular_test user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01) diff --git a/test/display_aggregates_only_test.cc b/test/display_aggregates_only_test.cc new file mode 100644 index 0000000000..46ab2e310f --- /dev/null +++ b/test/display_aggregates_only_test.cc @@ -0,0 +1,40 @@ + +#undef NDEBUG +#include +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// Ok this test is super ugly. We want to check what happens with the file +// reporter in the presence of DisplayAggregatesOnly(). +// We do not care about console output, the normal tests check that already. + +void BM_SummaryRepeat(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); + +int main(int argc, char* argv[]) { + const std::string output = GetFileReporterOutput(argc, argv); + + if (SubstrCnt(output, "BM_SummaryRepeat/repeats:3") != 6 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3\"") != 3 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3_mean\"") != 1 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3_median\"") != 1 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3_stddev\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find 6 " + "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" + "\"BM_SummaryRepeat/repeats:3\", " + "\"BM_SummaryRepeat/repeats:3\", " + "\"BM_SummaryRepeat/repeats:3\", " + "\"BM_SummaryRepeat/repeats:3_mean\", " + "\"BM_SummaryRepeat/repeats:3_median\", " + "\"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire output:\n"; + std::cout << output; + return 1; + } + + return 0; +} diff --git a/test/output_test.h b/test/output_test.h index 31a919991f..9385761b21 100644 --- a/test/output_test.h +++ b/test/output_test.h @@ -60,6 +60,13 @@ int SetSubstitutions( // Run all output tests. void RunOutputTests(int argc, char* argv[]); +// Count the number of 'pat' substrings in the 'haystack' string. +int SubstrCnt(const std::string& haystack, const std::string& pat); + +// Run registered benchmarks with file reporter enabled, and return the content +// outputted by the file reporter. +std::string GetFileReporterOutput(int argc, char* argv[]); + // ========================================================================= // // ------------------------- Results checking ------------------------------ // // ========================================================================= // diff --git a/test/output_test_helper.cc b/test/output_test_helper.cc index 394c4f5d1a..f84bd34521 100644 --- a/test/output_test_helper.cc +++ b/test/output_test_helper.cc @@ -1,8 +1,11 @@ +#include #include +#include #include #include #include #include +#include #include "../src/benchmark_api_internal.h" #include "../src/check.h" // NOTE: check.h is for internal use only! @@ -423,3 +426,37 @@ void RunOutputTests(int argc, char* argv[]) { CHECK(std::strcmp(csv.name, "CSVReporter") == 0); internal::GetResultsChecker().CheckResults(csv.out_stream); } + +int SubstrCnt(const std::string& haystack, const std::string& pat) { + if (pat.length() == 0) return 0; + int count = 0; + for (size_t offset = haystack.find(pat); offset != std::string::npos; + offset = haystack.find(pat, offset + pat.length())) + ++count; + return count; +} + +std::string GetFileReporterOutput(int argc, char* argv[]) { + std::vector new_argv(argv, argv + argc); + assert(static_cast(argc) == new_argv.size()); + + std::string tmp_file_name = std::tmpnam(nullptr); + std::cout << "Will be using this as the tmp file: " << tmp_file_name << '\n'; + + std::string tmp = "--benchmark_out="; + tmp += tmp_file_name; + new_argv.emplace_back(const_cast(tmp.c_str())); + + argc = int(new_argv.size()); + + benchmark::Initialize(&argc, new_argv.data()); + benchmark::RunSpecifiedBenchmarks(); + + // Read the output back from the file, and delete the file. + std::ifstream tmp_stream(tmp_file_name); + std::string output = std::string((std::istreambuf_iterator(tmp_stream)), + std::istreambuf_iterator()); + std::remove(tmp_file_name.c_str()); + + return output; +} diff --git a/test/report_aggregates_only_test.cc b/test/report_aggregates_only_test.cc new file mode 100644 index 0000000000..8eb8bde2e2 --- /dev/null +++ b/test/report_aggregates_only_test.cc @@ -0,0 +1,36 @@ + +#undef NDEBUG +#include +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// Ok this test is super ugly. We want to check what happens with the file +// reporter in the presence of ReportAggregatesOnly(). +// We do not care about console output, the normal tests check that already. + +void BM_SummaryRepeat(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); + +int main(int argc, char* argv[]) { + const std::string output = GetFileReporterOutput(argc, argv); + + if (SubstrCnt(output, "BM_SummaryRepeat/repeats:3") != 3 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3_mean\"") != 1 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3_median\"") != 1 || + SubstrCnt(output, "\"BM_SummaryRepeat/repeats:3_stddev\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find three " + "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" + "\"BM_SummaryRepeat/repeats:3_mean\", " + "\"BM_SummaryRepeat/repeats:3_median\", " + "\"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire output:\n"; + std::cout << output; + return 1; + } + + return 0; +} diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc index a98fc42a45..8045cfc9ea 100644 --- a/test/reporter_output_test.cc +++ b/test/reporter_output_test.cc @@ -342,6 +342,33 @@ ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"^\"BM_SummaryRepeat/repeats:3_median\",%csv_report$"}, {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}}); +// Test that non-aggregate data is not displayed. +// NOTE: this test is kinda bad. we are only testing the display output. +// But we don't check that the file output still contains everything... +void BM_SummaryDisplay(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryDisplay)->Repetitions(2)->DisplayAggregatesOnly(); +ADD_CASES(TC_ConsoleOut, + {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, + {"^BM_SummaryDisplay/repeats:2_mean %console_report$"}, + {"^BM_SummaryDisplay/repeats:2_median %console_report$"}, + {"^BM_SummaryDisplay/repeats:2_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, + {"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, + {"\"run_type\": \"aggregate\",$", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, + {"^\"BM_SummaryDisplay/repeats:2_mean\",%csv_report$"}, + {"^\"BM_SummaryDisplay/repeats:2_median\",%csv_report$"}, + {"^\"BM_SummaryDisplay/repeats:2_stddev\",%csv_report$"}}); + +// Test repeats with custom time unit. void BM_RepeatTimeUnit(benchmark::State& state) { for (auto _ : state) { }