From 4175f7a55bbb60f5344a7894ab1ffb0db202ce15 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 3 Jun 2021 16:54:06 +0300 Subject: [PATCH] [NFCI] Make BenchmarkRunner non-internal to it's .cpp file Currently the lifetime of a single BenchmarkRunner is constrained to a RunBenchmark(), but that will have to change for interleaved benchmark execution, because we'll need to keep it around to not forget how much repetitions of an instance we've done. --- src/benchmark_runner.cc | 387 +++++++++++++++++++--------------------- src/benchmark_runner.h | 45 +++++ 2 files changed, 226 insertions(+), 206 deletions(-) diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 4869aa7b27..55d6cf15d2 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "benchmark_runner.h" + #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" #include "internal_macros.h" @@ -106,7 +107,8 @@ BenchmarkReporter::Run CreateRunReport( report.max_bytes_used = memory_result.max_bytes_used; } - internal::Finish(&report.counters, results.iterations, seconds, b.threads()); + internal::Finish(&report.counters, results.iterations, seconds, + b.threads()); } return report; } @@ -137,238 +139,211 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, manager->NotifyThreadComplete(); } -class BenchmarkRunner { - public: - BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - std::vector* complexity_reports_) - : b(b_), - complexity_reports(complexity_reports_), - min_time(!IsZero(b.min_time()) ? b.min_time() - : FLAGS_benchmark_min_time), - repeats(b.repetitions() != 0 ? b.repetitions() - : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations() != 0), - pool(b.threads() - 1), - iters(has_explicit_iteration_count ? b.iterations() : 1), - perf_counters_measurement( - PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), - perf_counters_measurement_ptr(perf_counters_measurement.IsValid() - ? &perf_counters_measurement - : nullptr) { +} // end namespace + +BenchmarkRunner::BenchmarkRunner( + const benchmark::internal::BenchmarkInstance& b_, + std::vector* complexity_reports_) + : b(b_), + complexity_reports(complexity_reports_), + min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + repeats(b.repetitions() != 0 ? b.repetitions() + : FLAGS_benchmark_repetitions), + has_explicit_iteration_count(b.iterations() != 0), + pool(b.threads() - 1), + iters(has_explicit_iteration_count ? b.iterations() : 1), + perf_counters_measurement( + PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), + perf_counters_measurement_ptr(perf_counters_measurement.IsValid() + ? &perf_counters_measurement + : nullptr) { + run_results.display_report_aggregates_only = + (FLAGS_benchmark_report_aggregates_only || + FLAGS_benchmark_display_aggregates_only); + run_results.file_report_aggregates_only = + FLAGS_benchmark_report_aggregates_only; + if (b.aggregation_report_mode() != internal::ARM_Unspecified) { run_results.display_report_aggregates_only = - (FLAGS_benchmark_report_aggregates_only || - FLAGS_benchmark_display_aggregates_only); + (b.aggregation_report_mode() & + internal::ARM_DisplayReportAggregatesOnly); run_results.file_report_aggregates_only = - FLAGS_benchmark_report_aggregates_only; - if (b.aggregation_report_mode() != internal::ARM_Unspecified) { - run_results.display_report_aggregates_only = - (b.aggregation_report_mode() & - internal::ARM_DisplayReportAggregatesOnly); - run_results.file_report_aggregates_only = - (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); - CHECK(FLAGS_benchmark_perf_counters.empty() || - perf_counters_measurement.IsValid()) - << "Perf counters were requested but could not be set up."; - } - - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { - DoOneRepetition(repetition_num); - } - - // Calculate additional statistics - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); - - // Maybe calculate complexity report - if (complexity_reports && b.last_benchmark_instance) { - auto additional_run_stats = ComputeBigO(*complexity_reports); - run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); - complexity_reports->clear(); - } + (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); + CHECK(FLAGS_benchmark_perf_counters.empty() || + perf_counters_measurement.IsValid()) + << "Perf counters were requested but could not be set up."; } - RunResults&& get_results() { return std::move(run_results); } - - private: - RunResults run_results; + for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { + DoOneRepetition(repetition_num); + } - const benchmark::internal::BenchmarkInstance& b; - std::vector* complexity_reports; + // Calculate additional statistics + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); - const double min_time; - const int repeats; - const bool has_explicit_iteration_count; + // Maybe calculate complexity report + if (complexity_reports && b.last_benchmark_instance) { + auto additional_run_stats = ComputeBigO(*complexity_reports); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); + complexity_reports->clear(); + } +} - std::vector pool; +BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { + VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; - IterationCount iters; // preserved between repetitions! - // So only the first repetition has to find/calculate it, - // the other repetitions will just use that precomputed iteration count. + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(b.threads())); - PerfCountersMeasurement perf_counters_measurement; - PerfCountersMeasurement* const perf_counters_measurement_ptr; + // Run all but one thread in separate threads + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), + manager.get(), perf_counters_measurement_ptr); + } + // And run one thread here directly. + // (If we were asked to run just one thread, we don't create new threads.) + // Yes, we need to do this here *after* we start the separate threads. + RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); - struct IterationResults { - internal::ThreadManager::Result results; - IterationCount iters; - double seconds; - }; - IterationResults DoNIterations() { - VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; + // The main thread has finished. Now let's wait for the other threads. + manager->WaitForAllThreads(); + for (std::thread& thread : pool) thread.join(); - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(b.threads())); + IterationResults i; + // Acquire the measurements/counters from the manager, UNDER THE LOCK! + { + MutexLock l(manager->GetBenchmarkMutex()); + i.results = manager->results; + } - // Run all but one thread in separate threads - for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), - manager.get(), perf_counters_measurement_ptr); - } - // And run one thread here directly. - // (If we were asked to run just one thread, we don't create new threads.) - // Yes, we need to do this here *after* we start the separate threads. - RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); + // And get rid of the manager. + manager.reset(); - // The main thread has finished. Now let's wait for the other threads. - manager->WaitForAllThreads(); - for (std::thread& thread : pool) thread.join(); + // Adjust real/manual time stats since they were reported per thread. + i.results.real_time_used /= b.threads(); + i.results.manual_time_used /= b.threads(); + // If we were measuring whole-process CPU usage, adjust the CPU time too. + if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); - IterationResults i; - // Acquire the measurements/counters from the manager, UNDER THE LOCK! - { - MutexLock l(manager->GetBenchmarkMutex()); - i.results = manager->results; - } + VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" + << i.results.real_time_used << "\n"; - // And get rid of the manager. - manager.reset(); + // By using KeepRunningBatch a benchmark can iterate more times than + // requested, so take the iteration count from i.results. + i.iters = i.results.iterations / b.threads(); - // Adjust real/manual time stats since they were reported per thread. - i.results.real_time_used /= b.threads(); - i.results.manual_time_used /= b.threads(); - // If we were measuring whole-process CPU usage, adjust the CPU time too. - if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); + // Base decisions off of real time if requested by this benchmark. + i.seconds = i.results.cpu_time_used; + if (b.use_manual_time()) { + i.seconds = i.results.manual_time_used; + } else if (b.use_real_time()) { + i.seconds = i.results.real_time_used; + } - VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" - << i.results.real_time_used << "\n"; + return i; +} - // By using KeepRunningBatch a benchmark can iterate more times than - // requested, so take the iteration count from i.results. - i.iters = i.results.iterations / b.threads(); +IterationCount BenchmarkRunner::PredictNumItersNeeded( + const IterationResults& i) const { + // See how much iterations should be increased by. + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. + // Otherwise we use at most 10 times expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + bool is_significant = (i.seconds / min_time) > 0.1; + multiplier = is_significant ? multiplier : std::min(10.0, multiplier); + if (multiplier <= 1.0) multiplier = 2.0; + + // So what seems to be the sufficiently-large iteration count? Round up. + const IterationCount max_next_iters = static_cast( + std::lround(std::max(multiplier * static_cast(i.iters), + static_cast(i.iters) + 1.0))); + // But we do have *some* sanity limits though.. + const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); + + VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + return next_iters; // round up before conversion to integer. +} - // Base decisions off of real time if requested by this benchmark. - i.seconds = i.results.cpu_time_used; - if (b.use_manual_time()) { - i.seconds = i.results.manual_time_used; - } else if (b.use_real_time()) { - i.seconds = i.results.real_time_used; - } +bool BenchmarkRunner::ShouldReportIterationResults( + const IterationResults& i) const { + // Determine if this run should be reported; + // Either it has run for a sufficient amount of time + // or because an error was reported. + return i.results.has_error_ || + i.iters >= kMaxIterations || // Too many iterations already. + i.seconds >= min_time || // The elapsed time is large enough. + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this sanity check. + ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); +} - return i; +void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) { + const bool is_the_first_repetition = repetition_index == 0; + IterationResults i; + + // We *may* be gradually increasing the length (iteration count) + // of the benchmark until we decide the results are significant. + // And once we do, we report those last results and exit. + // Please do note that the if there are repetitions, the iteration count + // is *only* calculated for the *first* repetition, and other repetitions + // simply use that precomputed iteration count. + for (;;) { + i = DoNIterations(); + + // Do we consider the results to be significant? + // If we are doing repetitions, and the first repetition was already done, + // it has calculated the correct iteration time, so we have run that very + // iteration count just now. No need to calculate anything. Just report. + // Else, the normal rules apply. + const bool results_are_significant = !is_the_first_repetition || + has_explicit_iteration_count || + ShouldReportIterationResults(i); + + if (results_are_significant) break; // Good, let's report them! + + // Nope, bad iteration. Let's re-estimate the hopefully-sufficient + // iteration count, and run the benchmark again... + + iters = PredictNumItersNeeded(i); + assert(iters > i.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); } - IterationCount PredictNumItersNeeded(const IterationResults& i) const { - // See how much iterations should be increased by. - // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); - // If our last run was at least 10% of FLAGS_benchmark_min_time then we - // use the multiplier directly. - // Otherwise we use at most 10 times expansion. - // NOTE: When the last run was at least 10% of the min time the max - // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier = 2.0; - - // So what seems to be the sufficiently-large iteration count? Round up. - const IterationCount max_next_iters = static_cast( - std::lround(std::max(multiplier * static_cast(i.iters), - static_cast(i.iters) + 1.0))); - // But we do have *some* sanity limits though.. - const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); - - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; - return next_iters; // round up before conversion to integer. - } + // Oh, one last thing, we need to also produce the 'memory measurements'.. + MemoryManager::Result memory_result; + IterationCount memory_iterations = 0; + if (memory_manager != nullptr) { + // Only run a few iterations to reduce the impact of one-time + // allocations in benchmarks that are not properly managed. + memory_iterations = std::min(16, iters); + memory_manager->Start(); + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); - bool ShouldReportIterationResults(const IterationResults& i) const { - // Determine if this run should be reported; - // Either it has run for a sufficient amount of time - // or because an error was reported. - return i.results.has_error_ || - i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); + memory_manager->Stop(&memory_result); } - void DoOneRepetition(int64_t repetition_index) { - const bool is_the_first_repetition = repetition_index == 0; - IterationResults i; - - // We *may* be gradually increasing the length (iteration count) - // of the benchmark until we decide the results are significant. - // And once we do, we report those last results and exit. - // Please do note that the if there are repetitions, the iteration count - // is *only* calculated for the *first* repetition, and other repetitions - // simply use that precomputed iteration count. - for (;;) { - i = DoNIterations(); - - // Do we consider the results to be significant? - // If we are doing repetitions, and the first repetition was already done, - // it has calculated the correct iteration time, so we have run that very - // iteration count just now. No need to calculate anything. Just report. - // Else, the normal rules apply. - const bool results_are_significant = !is_the_first_repetition || - has_explicit_iteration_count || - ShouldReportIterationResults(i); - - if (results_are_significant) break; // Good, let's report them! - - // Nope, bad iteration. Let's re-estimate the hopefully-sufficient - // iteration count, and run the benchmark again... - - iters = PredictNumItersNeeded(i); - assert(iters > i.iters && - "if we did more iterations than we want to do the next time, " - "then we should have accepted the current iteration run."); - } - - // Oh, one last thing, we need to also produce the 'memory measurements'.. - MemoryManager::Result memory_result; - IterationCount memory_iterations = 0; - if (memory_manager != nullptr) { - // Only run a few iterations to reduce the impact of one-time - // allocations in benchmarks that are not properly managed. - memory_iterations = std::min(16, iters); - memory_manager->Start(); - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(1)); - RunInThread(&b, memory_iterations, 0, manager.get(), - perf_counters_measurement_ptr); - manager->WaitForAllThreads(); - manager.reset(); - - memory_manager->Stop(&memory_result); - } - - // Ok, now actualy report. - BenchmarkReporter::Run report = - CreateRunReport(b, i.results, memory_iterations, memory_result, - i.seconds, repetition_index, repeats); - - if (complexity_reports && !report.error_occurred) - complexity_reports->push_back(report); + // Ok, now actualy report. + BenchmarkReporter::Run report = + CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, + repetition_index, repeats); - run_results.non_aggregates.push_back(report); - } -}; + if (complexity_reports && !report.error_occurred) + complexity_reports->push_back(report); -} // end namespace + run_results.non_aggregates.push_back(report); +} RunResults RunBenchmark( const benchmark::internal::BenchmarkInstance& b, diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 9b0cf2a64e..9730ad386c 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -15,8 +15,13 @@ #ifndef BENCHMARK_RUNNER_H_ #define BENCHMARK_RUNNER_H_ +#include +#include + #include "benchmark_api_internal.h" #include "internal_macros.h" +#include "perf_counters.h" +#include "thread_manager.h" DECLARE_double(benchmark_min_time); @@ -42,6 +47,46 @@ struct RunResults { bool file_report_aggregates_only = false; }; +class BenchmarkRunner { + public: + BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + std::vector* complexity_reports_); + + RunResults&& get_results() { return std::move(run_results); } + + private: + RunResults run_results; + + const benchmark::internal::BenchmarkInstance& b; + std::vector* complexity_reports; + + const double min_time; + const int repeats; + const bool has_explicit_iteration_count; + + std::vector pool; + + IterationCount iters; // preserved between repetitions! + // So only the first repetition has to find/calculate it, + // the other repetitions will just use that precomputed iteration count. + + PerfCountersMeasurement perf_counters_measurement; + PerfCountersMeasurement* const perf_counters_measurement_ptr; + + struct IterationResults { + internal::ThreadManager::Result results; + IterationCount iters; + double seconds; + }; + IterationResults DoNIterations(); + + IterationCount PredictNumItersNeeded(const IterationResults& i) const; + + bool ShouldReportIterationResults(const IterationResults& i) const; + + void DoOneRepetition(int64_t repetition_index); +}; + RunResults RunBenchmark( const benchmark::internal::BenchmarkInstance& b, std::vector* complexity_reports);