diff --git a/docs/perf_counters.md b/docs/perf_counters.md index 74560e9669..527579a4ec 100644 --- a/docs/perf_counters.md +++ b/docs/perf_counters.md @@ -1,5 +1,3 @@ - - # User-Requested Performance Counters When running benchmarks, the user may choose to request collection of @@ -9,15 +7,31 @@ performance improvement matches expectations. This feature is available if: -* The benchmark is run on an architecture featuring a Performance Monitoring - Unit (PMU), -* The benchmark is compiled with support for collecting counters. Currently, - this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build - time +* The benchmark is run on an architecture featuring a Performance Monitoring Unit (PMU), +* The benchmark is compiled with support for collecting counters. + The feature does not require modifying benchmark code. Counter collection is handled at the boundaries where timer collection is also handled. +The counter values are reported back through the [User Counters](../README.md#custom-counters) +mechanism, meaning, they are available in all the formats (e.g. JSON) supported +by User Counters. + +## MacOS +MacOS, on Apple Silicon and Intel, has built in support for per thread instruction +and cycle counters. These counters can be queried by the by semi-undocumented API +in libpthread `thread_selfcounts`. Benchmark support for these counters is always +enabled as it requires no additional dependencies. + +To use, pass a comma-separated list of counter names through the +`--benchmark_perf_counters` flag. The only available counter names +are `CYCLES` and `INSTRUCTIONS`. + +## Linux +Currently, this requires [libpfm](http://perfmon2.sourceforge.net/) be available +at build time. + To opt-in: * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. @@ -29,6 +43,3 @@ they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are mapped by libpfm to platform-specifics - see libpfm [documentation](http://perfmon2.sourceforge.net/docs.html) for more details. -The counter values are reported back through the [User Counters](../README.md#custom-counters) -mechanism, meaning, they are available in all the formats (e.g. JSON) supported -by User Counters. \ No newline at end of file diff --git a/src/perf_counters.cc b/src/perf_counters.cc index 582475f0ba..af7291e8cc 100644 --- a/src/perf_counters.cc +++ b/src/perf_counters.cc @@ -114,6 +114,50 @@ void PerfCounters::CloseCounters() const { close(fd); } } +#elif defined(BENCHMARK_OS_MACOSX) +const bool PerfCounters::kSupported = true; + +//nothing to initialize (could check if the thread_self syscall is available) +bool PerfCounters::Initialize() { return true; } + +//nothing to close +void PerfCounters::CloseCounters() const {} + +PerfCounters PerfCounters::Create( + const std::vector& counter_names) { + + if (counter_names.empty()) { + return NoCounters(); + } + + if (counter_names.size() > PerfCounterValues::kMaxCounters) { + GetErrorLogInstance() + << counter_names.size() + << " counters were requested. The minimum is 1, the maximum is " + << PerfCounterValues::kMaxCounters + << "\n"; + return NoCounters(); + } + + std::vector counter_ids(counter_names.size()); + + for (size_t i=0;i values_; const size_t nr_counters_; }; @@ -104,11 +121,25 @@ class BENCHMARK_EXPORT PerfCounters final { // names()[i]'s value is (*values)[i] BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { #ifndef BENCHMARK_OS_WINDOWS +#ifdef BENCHMARK_OS_MACOSX + //call the undocumented syscall wrapper function to get per thread instructions and cycles + //the OS maintains these counters across context switches/cpu migrations. + uint64_t counts[2] = {}; //counts[0]=cycles, counts[1]=instructions + int res = thread_selfcounts(1, counts, sizeof(counts)); + + //copy the number of counters we have and reindex + uint64_t* buffer = (uint64_t*)values->get_data_buffer().first; + for (size_t i=0;iget_data_buffer(); auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); return static_cast(read_bytes) == buffer.second; +#endif #else (void)values; return false;