From df5524374b11bf84cc3ae02bd8be9d9f67282f85 Mon Sep 17 00:00:00 2001 From: Doug Evans Date: Mon, 1 Jul 2024 13:29:59 -0700 Subject: [PATCH] [FR] Add API to provide custom profilers #1807 This API is akin to the MemoryManager API and lets tools provide their own profiler which is wrapped in the same way MemoryManager is wrapped. Namely, the profiler provides Start/Stop methods that are called at the start/end of running the benchmark in a separate pass. --- CONTRIBUTORS | 1 + docs/user_guide.md | 15 ++++++++ include/benchmark/benchmark.h | 20 +++++++++++ src/benchmark.cc | 4 +++ src/benchmark_runner.cc | 65 +++++++++++++++++++++++++---------- src/benchmark_runner.h | 5 +++ test/CMakeLists.txt | 3 ++ test/profiler_manager_test.cc | 43 +++++++++++++++++++++++ 8 files changed, 137 insertions(+), 19 deletions(-) create mode 100644 test/profiler_manager_test.cc diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 9ca2caa3ee..54aba7b56d 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -42,6 +42,7 @@ Dominic Hamon Dominik Czarnota Dominik Korman Donald Aingworth +Doug Evans Eric Backus Eric Fiselier Eugene Zhuk diff --git a/docs/user_guide.md b/docs/user_guide.md index d22a906909..787e6eed12 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -1134,6 +1134,21 @@ a report on the number of allocations, bytes used, etc. This data will then be reported alongside other performance data, currently only when using JSON output. + + +## Profiling + +It's often useful to also profile benchmarks in particular ways, in addition to +CPU performance. For this reason, benchmark offers the `RegisterProfilerManager` +method that allows a custom `ProfilerManager` to be injected. + +If set, the `ProfilerManager::AfterSetupStart` and +`ProfilerManager::BeforeTeardownStop` methods will be called at the start and +end of a separate benchmark run to allow user code to collect and report +user-provided profile metrics. + +Output collected from this profiling run must be reported separately. + ## Using RegisterBenchmark(name, fn, args...) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 08cfe29da3..7dd72e27bc 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -416,6 +416,26 @@ class MemoryManager { BENCHMARK_EXPORT void RegisterMemoryManager(MemoryManager* memory_manager); +// If a ProfilerManager is registered (via RegisterProfilerManager()), the +// benchmark will be run an additional time under the profiler to collect and +// report profile metrics for the run of the benchmark. +class ProfilerManager { + public: + virtual ~ProfilerManager() {} + + // This is called after `Setup()` code and right before the benchmark is run. + virtual void AfterSetupStart() = 0; + + // This is called before `Teardown()` code and right after the benchmark + // completes. + virtual void BeforeTeardownStop() = 0; +}; + +// Register a ProfilerManager instance that will be used to collect and report +// profile measurements for benchmark runs. +BENCHMARK_EXPORT +void RegisterProfilerManager(ProfilerManager* profiler_manager); + // Add a key-value pair to output as part of the context stanza in the report. BENCHMARK_EXPORT void AddCustomContext(const std::string& key, const std::string& value); diff --git a/src/benchmark.cc b/src/benchmark.cc index 337bb3faa7..374c5141c9 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } +void RegisterProfilerManager(ProfilerManager* manager) { + internal::profiler_manager = manager; +} + void AddCustomContext(const std::string& key, const std::string& value) { if (internal::global_context == nullptr) { internal::global_context = new std::map(); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index a74bdadd3e..e597966d70 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -62,6 +62,8 @@ namespace internal { MemoryManager* memory_manager = nullptr; +ProfilerManager* profiler_manager = nullptr; + namespace { static constexpr IterationCount kMaxIterations = 1000000000000; @@ -401,6 +403,44 @@ void BenchmarkRunner::RunWarmUp() { } } +MemoryManager::Result* BenchmarkRunner::RunMemoryManager( + IterationCount& memory_iterations) { + // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an + // optional so we don't have to own the Result here. + // Can't do it now due to cxx03. + memory_results.push_back(MemoryManager::Result()); + MemoryManager::Result* memory_result = &memory_results.back(); + // Only run a few iterations to reduce the impact of one-time + // allocations in benchmarks that are not properly managed. + memory_iterations = std::min(16, iters); + memory_manager->Start(); + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); + b.Teardown(); + memory_manager->Stop(*memory_result); + return memory_result; +} + +void BenchmarkRunner::RunProfilerManager() { + // TODO: Provide a way to specify the number of iterations. + IterationCount profile_iterations = 1; + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + profiler_manager->AfterSetupStart(); + RunInThread(&b, profile_iterations, 0, manager.get(), + /*perf_counters_measurement_ptr=*/nullptr); + manager->WaitForAllThreads(); + profiler_manager->BeforeTeardownStop(); + manager.reset(); + b.Teardown(); +} + void BenchmarkRunner::DoOneRepetition() { assert(HasRepeatsRemaining() && "Already done all repetitions?"); @@ -445,28 +485,15 @@ void BenchmarkRunner::DoOneRepetition() { "then we should have accepted the current iteration run."); } - // Oh, one last thing, we need to also produce the 'memory measurements'.. + // Produce memory measurements if requested. MemoryManager::Result* memory_result = nullptr; IterationCount memory_iterations = 0; if (memory_manager != nullptr) { - // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an - // optional so we don't have to own the Result here. - // Can't do it now due to cxx03. - memory_results.push_back(MemoryManager::Result()); - memory_result = &memory_results.back(); - // Only run a few iterations to reduce the impact of one-time - // allocations in benchmarks that are not properly managed. - memory_iterations = std::min(16, iters); - memory_manager->Start(); - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(1)); - b.Setup(); - RunInThread(&b, memory_iterations, 0, manager.get(), - perf_counters_measurement_ptr); - manager->WaitForAllThreads(); - manager.reset(); - b.Teardown(); - memory_manager->Stop(*memory_result); + memory_result = RunMemoryManager(memory_iterations); + } + + if (profiler_manager != nullptr) { + RunProfilerManager(); } // Ok, now actually report. diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index db2fa04396..2cfc961919 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters); namespace internal { extern MemoryManager* memory_manager; +extern ProfilerManager* profiler_manager; struct RunResults { std::vector non_aggregates; @@ -113,6 +114,10 @@ class BenchmarkRunner { }; IterationResults DoNIterations(); + MemoryManager::Result* RunMemoryManager(IterationCount& memory_iterations); + + void RunProfilerManager(); + IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1de175f98d..815b581889 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous compile_output_test(memory_manager_test) benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s) +compile_output_test(profiler_manager_test) +benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s) + # MSVC does not allow to set the language standard to C++98/03. if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")) compile_benchmark_test(cxx03_test) diff --git a/test/profiler_manager_test.cc b/test/profiler_manager_test.cc new file mode 100644 index 0000000000..1b3e36c37f --- /dev/null +++ b/test/profiler_manager_test.cc @@ -0,0 +1,43 @@ +// FIXME: WIP + +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +class TestProfilerManager : public benchmark::ProfilerManager { + void AfterSetupStart() override {} + void BeforeTeardownStop() override {} +}; + +void BM_empty(benchmark::State& state) { + for (auto _ : state) { + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); + } +} +BENCHMARK(BM_empty); + +ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_empty\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}}); + +int main(int argc, char* argv[]) { + std::unique_ptr pm(new TestProfilerManager()); + + benchmark::RegisterProfilerManager(pm.get()); + RunOutputTests(argc, argv); + benchmark::RegisterProfilerManager(nullptr); +}