diff --git a/.clangd b/.clangd index 0a48ed4..935cc6a 100644 --- a/.clangd +++ b/.clangd @@ -2,3 +2,9 @@ CompileFlags: CompilationDatabase: build/ Diagnostics: UnusedIncludes: Strict + ClangTidy: + Add: + - 'bugprone-*' + - 'readability-*' + - 'clang-analyzer-core.*' + - 'clang-analyzer-security.*' diff --git a/CMakeLists.txt b/CMakeLists.txt index 83cc713..0691a8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,10 +3,6 @@ project(ctbench VERSION 1.1.1) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -if(${CTBENCH_ENABLE_CLANG_TIDY}) - set(CMAKE_CXX_CLANG_TIDY clang-tidy -checks=-*,readability-*) -endif() - include(cmake/dependencies.cmake) include(cmake/ctbench-compile-opts.cmake) diff --git a/CMakePresets.json b/CMakePresets.json index 1b6d0e2..734a23a 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -16,8 +16,7 @@ "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CXX_COMPILER": "clang++", - "CMAKE_C_COMPILER": "clang", - "CMAKE_CXX_CLANG_TIDY": "clang-tidy;-checks=-*,readability-*" + "CMAKE_C_COMPILER": "clang" } }, { diff --git a/cmake/options.cmake b/cmake/options.cmake index edb598e..f08f911 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -11,7 +11,3 @@ set(CTBENCH_ENABLE_TESTS set(CTBENCH_ENABLE_TRACY OFF CACHE BOOL "ctbench option: Enable Tracy profiler") - -set(CTBENCH_ENABLE_CLANG_TIDY - OFF - CACHE BOOL "ctbench option: Enable clang tidy") diff --git a/docs/images/ExecuteCompiler.svg b/docs/images/ExecuteCompiler.svg index d9e36b9..945430a 100644 --- a/docs/images/ExecuteCompiler.svg +++ b/docs/images/ExecuteCompiler.svg @@ -1,17 +1,17 @@ Gnuplot -Produced by GNUPLOT 5.4 patchlevel 3 +Produced by GNUPLOT 5.4 patchlevel 5 - + @@ -44,624 +44,166 @@ - - - - - + + + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-consecutiveloops-et - - - - bfbench-consecutiveloops-flat - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-consecutiveloops-flat - - - - bfbench-imbricatedloops-et - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-imbricatedloops-et - - - - bfbench-imbricatedloops-flat + variadicsum.expansion average - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - bfbench-imbricatedloops-flat + variadicsum.recursive average - + - + Timings - bfbench-consecutiveloops-et + variadicsum.expansion average - bfbench-consecutiveloops-et + variadicsum.expansion average - - + - bfbench-consecutiveloops-et + variadicsum.recursive average - bfbench-consecutiveloops-et + variadicsum.recursive average - bfbench-consecutiveloops-flat - - - bfbench-consecutiveloops-flat - - - - - - - bfbench-consecutiveloops-flat - - - bfbench-consecutiveloops-flat - - - - - - bfbench-imbricatedloops-et - - - bfbench-imbricatedloops-et - - - - - - - bfbench-imbricatedloops-et - - - bfbench-imbricatedloops-et - - - - - - bfbench-imbricatedloops-flat - - - bfbench-imbricatedloops-flat - - - - - - - bfbench-imbricatedloops-flat - - - bfbench-imbricatedloops-flat - - - - - - - - 1×106 - - - - - 2×106 - - - - - 3×106 - - - - - 4×106 - - - - - 5×106 - - - - 6×106 + + 60000 - - 7×106 + + 65000 - - 8×106 + + 70000 - - 9×106 + + 75000 - - 1×107 + + 80000 - - 1.1×107 + + 85000 - - 1 + + 0 - - 2 + + 10 - - 3 + + 20 - - 4 + + 30 - - 5 + + 40 - - 6 + + 50 - - 7 + + 60 - - 8 + + 70 @@ -669,7 +211,7 @@ - + diff --git a/docs/images/InstantiateFunction/foovoid.svg b/docs/images/InstantiateFunction/foovoid.svg new file mode 100644 index 0000000..4ab0e3e --- /dev/null +++ b/docs/images/InstantiateFunction/foovoid.svg @@ -0,0 +1,219 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + variadicsum.expansion average + + + + variadicsum.recursive average + + + + + + + Timings + + + + + variadicsum.expansion average + + + variadicsum.expansion average + + + + + + variadicsum.recursive average + + + variadicsum.recursive average + + + + + + + + 0 + + + + + 5000 + + + + + 10000 + + + + + 15000 + + + + + 20000 + + + + + 25000 + + + + + 0 + + + + + 10 + + + + + 20 + + + + + 30 + + + + + 40 + + + + + 50 + + + + + 60 + + + + + 70 + + + + + + + + + + + + + diff --git a/docs/images/Total_Backend.svg b/docs/images/Total_Backend.svg new file mode 100644 index 0000000..86f929f --- /dev/null +++ b/docs/images/Total_Backend.svg @@ -0,0 +1,234 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + variadicsum.expansion average + + + + variadicsum.recursive average + + + + + + + Timings + + + + + variadicsum.expansion average + + + variadicsum.expansion average + + + + + + variadicsum.recursive average + + + variadicsum.recursive average + + + + + + + + 1500 + + + + + 1550 + + + + + 1600 + + + + + 1650 + + + + + 1700 + + + + + 1750 + + + + + 1800 + + + + + 1850 + + + + + 0 + + + + + 10 + + + + + 20 + + + + + 30 + + + + + 40 + + + + + 50 + + + + + 60 + + + + + 70 + + + + + + + + + + + + + diff --git a/docs/images/Total_Frontend.svg b/docs/images/Total_Frontend.svg new file mode 100644 index 0000000..7f186f2 --- /dev/null +++ b/docs/images/Total_Frontend.svg @@ -0,0 +1,219 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + variadicsum.expansion average + + + + variadicsum.recursive average + + + + + + + Timings + + + + + variadicsum.expansion average + + + variadicsum.expansion average + + + + + + variadicsum.recursive average + + + variadicsum.recursive average + + + + + + + + 55000 + + + + + 60000 + + + + + 65000 + + + + + 70000 + + + + + 75000 + + + + + 80000 + + + + + 0 + + + + + 10 + + + + + 20 + + + + + 30 + + + + + 40 + + + + + 50 + + + + + 60 + + + + + 70 + + + + + + + + + + + + + diff --git a/docs/images/Total_InstantiateFunction.svg b/docs/images/Total_InstantiateFunction.svg index 3c9097a..78c778a 100644 --- a/docs/images/Total_InstantiateFunction.svg +++ b/docs/images/Total_InstantiateFunction.svg @@ -44,193 +44,166 @@ - - - + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et average + variadicsum.expansion average - + - bfbench-consecutiveloops-flat average + variadicsum.recursive average - - - bfbench-imbricatedloops-et average - - - - bfbench-imbricatedloops-flat average - - + - - + + Timings - bfbench-consecutiveloops-et average - - - bfbench-consecutiveloops-et average - - - - - - bfbench-consecutiveloops-flat average - - - bfbench-consecutiveloops-flat average - - - - - - bfbench-imbricatedloops-et average - - - bfbench-imbricatedloops-et average - - + variadicsum.expansion average - - - bfbench-imbricatedloops-flat average - - - bfbench-imbricatedloops-flat average + + variadicsum.expansion average - + + variadicsum.recursive average - - 100000 + + variadicsum.recursive average - - 150000 + - - - 200000 + + 0 - - 250000 + + 5000 - - 300000 + + 10000 - - 350000 + + 15000 - - 1 + + 20000 - - 2 + + 25000 - - 3 + + 0 - - 4 + + 10 - - 5 + + 20 - - 6 + + 30 - - 7 + + 40 - - 8 + + 50 - - 9 + + 60 - - 10 + + 70 @@ -238,7 +211,7 @@ - + diff --git a/docs/images/perfetto-ui.png b/docs/images/perfetto-ui.png new file mode 100644 index 0000000..a73fe8c Binary files /dev/null and b/docs/images/perfetto-ui.png differ diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 0000000..a85d028 --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,48 @@ +# Simple standalone project for compile-time benchmarking using ctbench + +# Usage: + +# ```sh +# cmake --preset release +# cmake --build --preset release +# ``` + +# CMake presets use clang/clang++ by default with time-trace enabled. + +cmake_minimum_required(VERSION 3.25) +project(example-project) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_CXX_STANDARD 20) + +find_package(ctbench REQUIRED) + +add_compile_options( + -Wall + -Wextra + -Werror + -Wnull-dereference + -Wold-style-cast + -Wdouble-promotion + -Wshadow) + +if(ENABLE_TIME_TRACE) + add_compile_options(-ftime-trace -ftime-trace-granularity=1 + -fconstexpr-steps=2147483647 -fbracket-depth=2147483647) +endif() + +set(BENCHMARK_START 0 CACHE STRING "Benchmark size minimum") +set(BENCHMARK_STOP 64 CACHE STRING "Benchmark size maximum") +set(BENCHMARK_STEP 1 CACHE STRING "Benchmark size step") +set(BENCHMARK_ITERATIONS 10 CACHE STRING "Number of samples per size") + +ctbench_add_benchmark( + variadic_sum.expansion variadic_sum/expansion.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) + +ctbench_add_benchmark( + variadic_sum.recursive variadic_sum/recursive.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) + +ctbench_add_graph(variadic_sum-compare-graph compare-all.json + variadic_sum.expansion variadic_sum.recursive) diff --git a/example/CMakePresets.json b/example/CMakePresets.json new file mode 100644 index 0000000..0632b03 --- /dev/null +++ b/example/CMakePresets.json @@ -0,0 +1,65 @@ +{ + "version" : 4, + "cmakeMinimumRequired" : { + "major" : 3, + "minor" : 25, + "patch" : 0 + }, + "configurePresets" : [ + { + "name" : "dev", + "displayName" : "Dev build", + "description" : "Provides a compile_commands.json file", + "generator" : "Ninja", + "binaryDir" : "${sourceDir}/build", + "cacheVariables" : { + "ENABLE_TIME_TRACE" : "ON", + "CMAKE_CXX_COMPILER" : "clang++", + "CMAKE_C_COMPILER" : "clang", + "CMAKE_EXPORT_COMPILE_COMMANDS" : "ON", + "CMAKE_BUILD_TYPE" : "RelWithDebInfo" + } + }, + { + "name" : "release", + "displayName" : "Release", + "description" : "Release", + "generator" : "Ninja", + "binaryDir" : "${sourceDir}/build/release", + "cacheVariables" : { + "ENABLE_TIME_TRACE" : "ON", + "CMAKE_CXX_COMPILER" : "clang++", + "CMAKE_C_COMPILER" : "clang", + "CMAKE_EXPORT_COMPILE_COMMANDS" : "ON", + "CMAKE_BUILD_TYPE" : "RelWithDebInfo" + } + }, + { + "name" : "debug", + "inherits" : "release", + "displayName" : "Debug", + "description" : "Debug", + "binaryDir" : "${sourceDir}/build/debug", + "cacheVariables" : { + "CMAKE_BUILD_TYPE" : "Debug" + } + } + ], + "buildPresets" : [ + { + "name" : "release", + "configurePreset" : "release", + "nativeToolOptions" : [ + "-j1" + ], + "targets" : [ + "ctbench-graph-all" + ] + }, + { + "name" : "debug", + "inherits" : "release", + "configurePreset" : "debug" + } + ] +} diff --git a/example/compare-all.json b/example/compare-all.json new file mode 100644 index 0000000..7cb5bd3 --- /dev/null +++ b/example/compare-all.json @@ -0,0 +1,14 @@ +{ + "plotter": "compare_by", + "legend_title": "Timings", + "x_label": "Benchmark size factor", + "y_label": "Time (µs)", + "draw_average": true, + "demangle": false, + "draw_points": false, + "width": 800, + "height": 400, + "key_ptrs": ["/name", "/args/detail"], + "value_ptr": "/dur", + "plot_file_extensions": [".svg"] +} diff --git a/example/variadic_sum/expansion.cpp b/example/variadic_sum/expansion.cpp new file mode 100644 index 0000000..a67b953 --- /dev/null +++ b/example/variadic_sum/expansion.cpp @@ -0,0 +1,27 @@ +#include + +/// Compile-time std::size_t +template struct ct_uint_t { + static constexpr std::size_t value = N; +}; + +/// Expansion compile-time sum implementation +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } + +template constexpr auto sum(Ts const &...) { + return ct_uint_t<(Ts::value + ... + 0)>{}; +} + +// Driver code + +template constexpr auto foo() { + return [](std::index_sequence) { + return sum(ct_uint_t{}...); + } + (std::make_index_sequence{}); +} + +[[maybe_unused]] constexpr std::size_t result = + decltype(foo())::value; diff --git a/example/variadic_sum/recursive.cpp b/example/variadic_sum/recursive.cpp new file mode 100644 index 0000000..5fea438 --- /dev/null +++ b/example/variadic_sum/recursive.cpp @@ -0,0 +1,29 @@ +#include + +/// Compile-time std::size_t +template struct ct_uint_t { + static constexpr std::size_t value = N; +}; + +/// Recursive compile-time sum implementation +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } +template constexpr auto sum(T const &) { return T{}; } + +template +constexpr auto sum(T const &, Ts const &...tl) { + return ct_uint_t{}; +} + +// Driver code + +template constexpr auto foo() { + return [](std::index_sequence) { + return sum(ct_uint_t{}...); + } + (std::make_index_sequence{}); +} + +[[maybe_unused]] constexpr std::size_t result = + decltype(foo())::value; diff --git a/grapher/include/grapher/core.hpp b/grapher/include/grapher/core.hpp index fa15477..f628d55 100644 --- a/grapher/include/grapher/core.hpp +++ b/grapher/include/grapher/core.hpp @@ -26,6 +26,9 @@ using multimap_t = /// Alias type for JSON objects. using json_t = nlohmann::basic_json; +/// Default type to represent scalar values from benchmark data. +using value_t = unsigned long; + // `time cmake --build --preset bench` results using different containers // (poacher/brainfuck project, pre-built benchmark targets): // - boost::container::flat_map -> 78.05 secs diff --git a/grapher/include/grapher/utils/json.hpp b/grapher/include/grapher/utils/json.hpp index 4a399ef..4808fa8 100644 --- a/grapher/include/grapher/utils/json.hpp +++ b/grapher/include/grapher/utils/json.hpp @@ -19,9 +19,10 @@ namespace grapher { /// For each instance in entry, returns the sum of the values pointed by /// value_jptr in the events matching the descriptor's predicates. -std::vector get_values(benchmark_instance_t const &instance, - std::vector const &predicates, - grapher::json_t::json_pointer value_jptr); +std::vector +get_values(benchmark_instance_t const &instance, + std::vector const &predicates, + grapher::json_t::json_pointer value_jptr); /// Generic conversion of a JSON field location to a std::string template @@ -172,7 +173,7 @@ grapher::json_t::array_t write_descriptors(std::vector const &descriptors); /// Reads a single descriptor. -group_descriptor_t read_descriptor(grapher::json_t const &j); +group_descriptor_t read_descriptor(grapher::json_t const &descriptor_json); /// Reads descriptors from a predicate list. std::vector diff --git a/grapher/lib/grapher/plotters/compare.cpp b/grapher/lib/grapher/plotters/compare.cpp index 55cf2e1..7f346e2 100644 --- a/grapher/lib/grapher/plotters/compare.cpp +++ b/grapher/lib/grapher/plotters/compare.cpp @@ -59,11 +59,11 @@ void plotter_compare_t::plot(benchmark_set_t const &bset, std::vector predicates = get_predicates(descriptor); for (benchmark_case_t const &bench : bset) { - std::vector x_points; - std::vector y_points; + std::vector x_points; + std::vector y_points; - std::vector x_average; - std::vector y_average; + std::vector x_average; + std::vector y_average; for (benchmark_instance_t const &instance : bench.instances) { check(!instance.repetitions.empty(), @@ -71,7 +71,7 @@ void plotter_compare_t::plot(benchmark_set_t const &bset, bench.name, instance.size), error_level_t::warning_v); - std::vector const values = + std::vector const values = get_values(instance, predicates, value_json_pointer); check(!values.empty(), @@ -82,7 +82,7 @@ void plotter_compare_t::plot(benchmark_set_t const &bset, // Drawing points if (draw_points) { - for (double value : values) { + for (grapher::value_t value : values) { x_points.push_back(instance.size); y_points.push_back(value); } diff --git a/grapher/lib/grapher/plotters/compare_by.cpp b/grapher/lib/grapher/plotters/compare_by.cpp index c8c3901..c9c234d 100644 --- a/grapher/lib/grapher/plotters/compare_by.cpp +++ b/grapher/lib/grapher/plotters/compare_by.cpp @@ -26,26 +26,62 @@ namespace grapher::plotters { // Plot-friendly data structures +/// Value key type. Contains multiple values to group by a tuple of parameters +using key_t = boost::container::small_vector; + /// Point aggregate (multiple Y coordinates) -using point_data_t = std::vector; +using point_data_t = std::vector; /// Curve: X -> vec -using benchmark_curve_t = map_t; +using benchmark_curve_t = grapher::map_t; /// Benchmark name -> Curve -using curve_aggregate_t = map_t; - -/// Value key type. Contains multiple values to group by a tuple of parameters -using key_t = boost::container::small_vector; +using curve_aggregate_t = grapher::map_t; /// Feature -> Benchmark aggregate -using curve_aggregate_map_t = map_t; +using curve_aggregate_map_t = grapher::map_t; + +struct process_event_parameters_t { + std::vector const &key_pointers; + json_t::json_pointer const &value_pointer; + benchmark_case_t const &bench_case; + benchmark_instance_t const &instance; +}; + +/// Generate a curve for a given time-trace event and stores it in output_map. +inline void process_event(curve_aggregate_map_t &output_map, + grapher::json_t const &event, + process_event_parameters_t const ¶meters) { + // Building key from JSON pointers + key_t key; + for (json_t::json_pointer const &key_ptr : parameters.key_pointers) { + if (event.contains(key_ptr) && event[key_ptr].is_string()) { + key.push_back(event[key_ptr]); + } + } -/// Wrangles data into a structure that's easier to work with for plotting. + // Key/value presence and type checks + if (check(event.contains(parameters.value_pointer), + fmt::format("No value at {}: {}", + parameters.value_pointer.to_string(), event.dump()), + info_v) && + check(event[parameters.value_pointer].is_number(), + fmt::format("Value at {} is not an integer: {}", + parameters.value_pointer.to_string(), event.dump()), + info_v)) { + // Adding value + output_map[key][parameters.bench_case.name][parameters.instance.size] + .push_back(event[parameters.value_pointer]); + } +} + +/// Scans event data at value_pointer and generates curves for each key +/// generated from key_pointers. The curves are stored in a nested map +/// structure. curve_aggregate_map_t get_bench_curves(benchmark_set_t const &bset, - std::vector const &key_ptrs, - json_t::json_pointer const &val_ptr) { + std::vector const &key_pointers, + json_t::json_pointer const &value_pointer) { ZoneScoped; namespace fs = std::filesystem; @@ -62,27 +98,11 @@ get_bench_curves(benchmark_set_t const &bset, for (grapher::json_t const &event : get_as_ref( repetition_json, "traceEvents")) { - - // Building key from JSON pointers - key_t key; - for (json_t::json_pointer const &key_ptr : key_ptrs) { - if (event.contains(key_ptr) && event[key_ptr].is_string()) { - key.push_back(event[key_ptr]); - } - } - - // Key/value presence and type checks - if (check(event.contains(val_ptr), - fmt::format("No value at {}: {}", val_ptr.to_string(), - event.dump()), - info_v) && - check(event[val_ptr].is_number(), - fmt::format("Value at {} is not an integer: {}", - val_ptr.to_string(), event.dump()), - info_v)) { - // Adding value - res[key][bench_case.name][instance.size].push_back(event[val_ptr]); - } + process_event(res, event, + {.key_pointers = key_pointers, + .value_pointer = value_pointer, + .bench_case = bench_case, + .instance = instance}); } } } @@ -97,15 +117,15 @@ std::string to_string(key_t const &key, bool demangle = true) { return "empty"; } - std::string res = demangle ? llvm::demangle(key[0]) : key[0]; + std::string result = demangle ? llvm::demangle(key[0]) : key[0]; std::for_each(key.begin() + 1, key.end(), [&](std::string const &part) { - res += '/'; - for (char const c : demangle ? llvm::demangle(part) : part) { - res += c == '/' ? '_' : c; + result += '/'; + for (char const name_character : demangle ? llvm::demangle(part) : part) { + result += name_character == '/' ? '_' : name_character; } }); - return res; + return result; } // ============================================================================= @@ -126,6 +146,76 @@ grapher::json_t plotter_compare_by_t::get_default_config() const { return res; } +/// Parameter list for the generate_plot function +/// extracted into a struct for readability +struct generate_plot_parameters_t { + std::filesystem::path const &plot_output_folder; + grapher::json_t const &plotter_config; + bool draw_average; + bool draw_points; + bool demangle; +}; + +/// Function to generate one plot. +/// NB: This function must remain free of config reading logic. +inline void generate_plot( + curve_aggregate_map_t::const_iterator::value_type aggregate_key_value, + generate_plot_parameters_t const ¶meters) { + ZoneScoped; // Used for profiling with Tracy + + auto const &[key, curve_aggregate] = aggregate_key_value; + + // Plot init + sciplot::Plot2D plot; + + for (auto const &[bench_name, benchmark_curve] : curve_aggregate) { + // Average curve coord vectors + std::vector x_curve; + std::vector y_curve; + + // Point coord vectors + std::vector x_points; + std::vector y_points; + + // Build point & curve vectors + for (auto const &[x_value, y_values] : benchmark_curve) { + // Building average curve vector + if (parameters.draw_average && !y_values.empty()) { + grapher::value_t const sum = + std::reduce(y_values.begin(), y_values.end()); + grapher::value_t const average_point_y = sum / y_values.size(); + + x_curve.push_back(x_value); + y_curve.push_back(average_point_y); + } + + // Building point vector + if (parameters.draw_points) { + for (grapher::value_t y_value : y_values) { + x_points.push_back(x_value); + y_points.push_back(y_value); + } + } + } + + // Plot drawing + + if (parameters.draw_average && !x_curve.empty()) { + // Draw average curve + plot.drawCurve(x_curve, y_curve).label(bench_name + " average"); + } + + if (parameters.draw_points && !x_points.empty()) { + // Draw points + plot.drawPoints(x_points, y_points).label(bench_name + " points"); + } + } + + save_plot(std::move(plot), + parameters.plot_output_folder / to_string(key, parameters.demangle), + parameters.plotter_config); +} + void plotter_compare_by_t::plot(benchmark_set_t const &bset, std::filesystem::path const &dest, grapher::json_t const &config) const { @@ -145,8 +235,8 @@ void plotter_compare_by_t::plot(benchmark_set_t const &bset, std::vector key_ptrs; std::transform(key_strs.begin(), key_strs.end(), std::back_inserter(key_ptrs), - [](std::string const &s) -> json_t::json_pointer { - return json_t::json_pointer{s}; + [](std::string const &pointer) -> json_t::json_pointer { + return json_t::json_pointer{pointer}; }); // Wrangling @@ -157,61 +247,14 @@ void plotter_compare_by_t::plot(benchmark_set_t const &bset, fs::create_directories(dest); // Drawing, ie. unwrapping the nested maps and drawing curves + saving plots - std::for_each( std::execution::par_unseq, curve_aggregate_map.begin(), - curve_aggregate_map.end(), [&](auto const &kv) { - ZoneScoped; - auto const &[key, curve_aggregate] = kv; - - // Plot init - sciplot::Plot2D plot; - - for (auto const &[bench_name, benchmark_curve] : curve_aggregate) { - // Average curve coord vectors - std::vector x_curve; - std::vector y_curve; - - // Point coord vectors - std::vector x_points; - std::vector y_points; - - // Build point & curve vectors - for (auto const &[x, y_vec] : benchmark_curve) { - // Building average curve vector - if (draw_average && !y_vec.empty()) { - double const sum = std::reduce(y_vec.begin(), y_vec.end()); - std::size_t const n = y_vec.size(); - - double const y = sum / n; - - x_curve.push_back(x); - y_curve.push_back(y); - } - - // Building point vector - if (draw_points) { - for (double y : y_vec) { - x_points.push_back(x); - y_points.push_back(y); - } - } - } - - // Plot drawing - - if (draw_average && !x_curve.empty()) { - // Draw average curve - plot.drawCurve(x_curve, y_curve).label(bench_name + " average"); - } - - if (draw_points && !x_points.empty()) { - // Draw points - plot.drawPoints(x_points, y_points).label(bench_name + " points"); - } - } - - save_plot(std::move(plot), dest / to_string(key, demangle), config); + curve_aggregate_map.end(), [&](auto const &aggregate_key_value) { + generate_plot(aggregate_key_value, {.plot_output_folder = dest, + .plotter_config = config, + .draw_average = draw_average, + .draw_points = draw_points, + .demangle = demangle}); }); } diff --git a/grapher/lib/grapher/plotters/plotters.cpp b/grapher/lib/grapher/plotters/plotters.cpp index 1ffc3ee..495dbaf 100644 --- a/grapher/lib/grapher/plotters/plotters.cpp +++ b/grapher/lib/grapher/plotters/plotters.cpp @@ -8,9 +8,9 @@ plotter_type_t string_to_plotter_type(std::string const &name) { return compare_v; } - if (auto const it = plotter_name_map.find(name); - it != plotter_name_map.end()) { - return it->second; + if (auto const name_iterator = plotter_name_map.find(name); + name_iterator != plotter_name_map.end()) { + return name_iterator->second; } return compare_v; diff --git a/grapher/lib/grapher/plotters/stack.cpp b/grapher/lib/grapher/plotters/stack.cpp index 2647c48..fc5f5a1 100644 --- a/grapher/lib/grapher/plotters/stack.cpp +++ b/grapher/lib/grapher/plotters/stack.cpp @@ -44,7 +44,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, std::vector plots; // Storing max y value for normalization - double max_y_val = 0.; + grapher::value_t max_y_val = 0.; /// Draws a stacked curve graph for a given benchmark auto draw_plot = [&](benchmark_case_t const &bench) -> sciplot::Plot2D { @@ -52,15 +52,17 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, apply_config(plot, config); // x axis - std::vector x; - std::transform( - bench.instances.begin(), bench.instances.end(), std::back_inserter(x), - [](benchmark_instance_t const &i) -> double { return i.size; }); + std::vector x_axis; + std::transform(bench.instances.begin(), bench.instances.end(), + std::back_inserter(x_axis), + [](benchmark_instance_t const &element) -> grapher::value_t { + return element.size; + }); // Low y axis - std::vector y_low(x.size(), 0.); + std::vector y_low(x_axis.size(), 0.); // High y axis - std::vector y_high(x.size()); + std::vector y_high(x_axis.size()); for (group_descriptor_t const &descriptor : descriptors) { // Storing previous value as we iterate @@ -70,7 +72,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, for (std::size_t i = 0; i < bench.instances.size(); i++) { benchmark_instance_t const &instance = bench.instances[i]; - std::vector const values = + std::vector const values = get_values(instance, predicates, feature_value_jptr); check(values.empty(), @@ -79,7 +81,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, descriptor.name, bench.name, instance.size)); // TODO: Get better stats (standard deviation, etc...) - double const y_val = + grapher::value_t const y_val = y_low[i] + std::reduce(values.begin(), values.end()) / values.size(); @@ -89,7 +91,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, max_y_val = std::max(max_y_val, y_val); } - plot.drawCurvesFilled(x, y_low, y_high).label(std::move(curve_name)); + plot.drawCurvesFilled(x_axis, y_low, y_high).label(std::move(curve_name)); // Swapping std::swap(y_low, y_high); @@ -106,7 +108,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, // Normalize & save std::filesystem::create_directories(dest); for (std::size_t i = 0; i < bset.size(); i++) { - plots[i].yrange(0., max_y_val); + plots[i].yrange(0., double(max_y_val)); save_plot(plots[i], dest / bset[i].name, config); } } diff --git a/grapher/lib/grapher/predicates.cpp b/grapher/lib/grapher/predicates.cpp index 5cb60e9..77dbdcf 100644 --- a/grapher/lib/grapher/predicates.cpp +++ b/grapher/lib/grapher/predicates.cpp @@ -87,34 +87,34 @@ inline auto match(grapher::json_t const &constraint) { regex_match_opt = constraint.value("regex", false)]( grapher::json_t const &value) -> bool { auto items_instance_proxy = matcher_flat.items(); - return std::all_of( - items_instance_proxy.begin(), items_instance_proxy.end(), - [&](auto const &matcher_item_kv) -> bool { - // Pointer to the value we should observe - grapher::json_t::json_pointer const ptr(matcher_item_kv.key()); + return std::all_of(items_instance_proxy.begin(), items_instance_proxy.end(), + [&](auto const &matcher_item_kv) -> bool { + // Pointer to the value we should observe + grapher::json_t::json_pointer const ptr( + matcher_item_kv.key()); - // Checking for existence of matching value - if (!value.contains(ptr)) { - return false; - } + // Checking for existence of matching value + if (!value.contains(ptr)) { + return false; + } - // Regex match - if (regex_match_opt) { - grapher::json_t const &val = value[ptr]; + // Regex match + if (regex_match_opt) { + grapher::json_t const &val = value[ptr]; - // Fallback to non regex if the value isn't a string - if (!val.is_string()) { - return val == matcher_item_kv.value(); - } + // Fallback to non regex if the value isn't a string + if (!val.is_string()) { + return val == matcher_item_kv.value(); + } - return std::regex_match( - val.get_ref(), - std::regex(matcher_item_kv.value())); - } + return std::regex_match( + val.get_ref(), + std::regex(matcher_item_kv.value())); + } - // Regular match - return value[ptr] == matcher_item_kv.value(); - }); + // Regular match + return value[ptr] == matcher_item_kv.value(); + }); }; } diff --git a/grapher/lib/grapher/utils/json.cpp b/grapher/lib/grapher/utils/json.cpp index d8853ba..cf0f61e 100644 --- a/grapher/lib/grapher/utils/json.cpp +++ b/grapher/lib/grapher/utils/json.cpp @@ -59,16 +59,18 @@ grapher::json_t::array_t extract_group(group_descriptor_t const &descriptor, std::ranges::copy_if( events, std::back_inserter(res), [&](grapher::json_t const &event) { return std::ranges::all_of( - predicates, [&](predicate_t const &p) { return p(event); }); + predicates, + [&](predicate_t const &predicate) { return predicate(event); }); }); return res; } -group_descriptor_t read_descriptor(grapher::json_t const &j) { - return {.name = get_as_ref(j, "name"), - .predicates = - get_as_ref(j, "predicates")}; +group_descriptor_t read_descriptor(grapher::json_t const &descriptor_json) { + return {.name = get_as_ref(descriptor_json, + "name"), + .predicates = get_as_ref( + descriptor_json, "predicates")}; } grapher::json_t group_descriptor_json(group_descriptor_t const &descriptor) { @@ -98,27 +100,32 @@ read_descriptors(grapher::json_t::array_t const &list) { return res; } -std::vector get_values(benchmark_instance_t const &instance, - std::vector const &predicates, - grapher::json_t::json_pointer value_jptr) { - std::vector res(instance.repetitions.size()); +std::vector +get_values(benchmark_instance_t const &instance, + std::vector const &predicates, + grapher::json_t::json_pointer value_jptr) { + std::vector res(instance.repetitions.size()); - auto get_val = [&](std::filesystem::path const &repetition_path) -> double { + auto get_val = + [&](std::filesystem::path const &repetition_path) -> grapher::value_t { // Extract events - grapher::json_t j; + grapher::json_t repetition_data; { std::ifstream repetition_ifstream(repetition_path); - repetition_ifstream >> j; + repetition_ifstream >> repetition_data; } grapher::json_t::array_t const &events = - get_as_ref(j, "traceEvents"); + get_as_ref(repetition_data, + "traceEvents"); // Accumulate - double val = 0.; + grapher::value_t val = 0; for (grapher::json_t const &event : events) { if (std::all_of(predicates.begin(), predicates.end(), - [&](predicate_t const &p) -> bool { return p(event); })) { + [&](predicate_t const &predicate) -> bool { + return predicate(event); + })) { val += get_as_ref(event, value_jptr); } } diff --git a/paper.bib b/paper.bib index d1598a9..fe83360 100644 --- a/paper.bib +++ b/paper.bib @@ -1,20 +1,20 @@ @misc{static-reflection, - author = "Daveed Vandevoorde and Wyatt Childers and Andrew Sutton and Faisal - Vali", - title = "{P1240R2}: Scalable Reflection", - howpublished = "\url{https://wg21.link/p1240r2}", - year = 2022, - month = 1, - publisher = "WG21", + author = "Daveed Vandevoorde and Wyatt Childers and Andrew Sutton and Faisal + Vali", + title = "{P1240R2}: Scalable Reflection", + howpublished = "\url{https://wg21.link/p1240r2}", + year = 2022, + month = 1, + publisher = "WG21", } @misc{constexpr-memory, - author = "Barry Revzin", - title = "{P2670R0}: Non-transient constexpr allocation", - howpublished = "\url{https://wg21.link/p2670r0}", - year = 2022, - month = 10, - publisher = "WG21" + author = "Barry Revzin", + title = "{P2670R0}: Non-transient constexpr allocation", + howpublished = "\url{https://wg21.link/p2670r0}", + year = 2022, + month = 10, + publisher = "WG21", } @article{more-constexpr-containers, @@ -28,12 +28,14 @@ @article{more-constexpr-containers @misc{metabench, title = {Metabench: A simple framework for compile-time microbenchmarks}, author = {Dionne, Louis and Dutra, Bruno and Holmes, Odin and others}, + year = {2017}, url = {https://github.com/ldionne/metabench/}, } @misc{poacher, - title = {ctbench: Compile-time benchmark and analysis}, + title = {poacher: C++ compile-time compiling experiments}, author = {Jules {Penuchot}}, + year = {2020}, url = {https://github.com/jpenuchot/poacher/}, } @@ -94,6 +96,14 @@ @online{ctbench-cppp21 url = {https://www.youtube.com/watch?v=1RZY6skM0Rc}, } +@online{time-trace, + author = {Anton Afanasyev}, + title = {Adds `-ftime-trace` option to clang that produces Chrome + `chrome://tracing` compatible JSON profiling output dumps}, + year = {2019}, + url = {https://reviews.llvm.org/D58675}, +} + @online{meetingcpp22, author = {Paul {Keir}, Joel {Falcou}, Jules {Penuchot}, Andrew {Gozillon}}, title = {Meeting C++ - A totally constexpr standard library}, diff --git a/paper.md b/paper.md index a6774aa..83cebb3 100644 --- a/paper.md +++ b/paper.md @@ -2,17 +2,23 @@ title: 'ctbench - compile-time benchmarking and analysis' tags: - C++ - - metaprogramming + - meta-programming - compilation - benchmarking - library authors: - name: Jules Penuchot orcid: 0000-0002-6377-6880 - equal-contrib: true + equal-contrib: false + affiliation: 1 + - name: Joel Falcou + orcid: 0000-0001-5380-7375 + equal-contrib: false affiliation: 1 affiliations: - - name: Jules Penuchot, LISN, Paris-Saclay University, France + - name: Université Paris-Saclay, CNRS, + Laboratoire Interdisciplinaire des Sciences du Numérique, + 91400, Orsay, France index: 1 date: 07 December 2023 bibliography: paper.bib @@ -20,118 +26,296 @@ bibliography: paper.bib # Summary -With metaprogrammed libraries like Eigen[@eigen], Blaze[@blazelib], or -CTRE[@ctre] being developed, we're seeing increasing computing needs at compile -time. These compile-time computing needs might grow even further as C++ embeds -more features over time to support and extend this kind of practices, like -compile-time containers[@more-constexpr-containers] or static -reflection[@static-reflection]. - -That increase in compute needs raises the question on how to measure the impact -of metaprogramming techniques on compile times. There are a lot of tools to run -benchmarks for "runtime" programs, but as of today, only Metabench[@metabench] -is capable of running compile-time benchmarks instantiated at several sizes to -measure compile-time scaling of metaprogramming techniques. Another tool called -Templight[@templight] has debugging and profiling capabilities for templates -using Clang, although it only works as a "one-shot" profiler, which can't be -used to study how metaprograms scale. Online compile-time benchmarking tool -Build-Bench[@buildbench] is available too, but only allows simple A/B -comparisons by measuring compiler execution time. - -Clang has a built-in profiler that provides in-depth time measurements of -various compilation steps, which can be enabled by passing the `-ftime-trace` -flag. Its output contains data that can be directly linked to symbols in the -source code, making it easier to study the impact of specific symbols on various -stages of compilation. The output format is a JSON file meant to be compatible -with Chrome's flame graph visualizer, that contains a series of timed events -with optional metadata like the (mangled) C++ symbol or the file related to an -event. +With libraries like Eigen[@eigen], Blaze[@blazelib], or CTRE[@ctre] being developed with +a large tempalte meta-programmed implementation, we're seeing increasing computing needs at compile +time. These needs might grow even larger as C++ embeds more features over time +to support and extend this kind of practices, like compile-time +containers[@more-constexpr-containers] or static reflection[@static-reflection]. +However, there is still no clear cut methodology to compare the performance +impact of different meta-programming strategies. But, as new C++ features +allows for new techniques with claimed better compile-time performance, +no proper methodologies is provided to back up those claims. + +This paper introduces **ctbench**, which is a set of tools for compile-time +benchmarking and analysis in C++. It aims to provide developer-friendly tools to +declare and run benchmarks, then aggregate, filter out, and plot the data to +analyze it. As such, **ctbench** is meant to become the first layer for a proper +scientific methodology for analyzing compile-time program behavior. + + + +We'll first have a look at current tools for compile-time profiling and +benchmarking and establish the limits of what these tools can do. # Statement of need -Originally inspired by Metabench[@metabench], ctbench development was +C++ template meta-programming raised interest for allowing computing libraries to +offer great performance with a very high level of abstraction. As a tradeoff for +interpreting representations of calculations at runtime, they are represented at +compile-time, and transformed directly into their own programs. + +As meta-programming became easier with C++11 and C++17, it became more mainstream +and consequently, developers have to bear with longer compilation times without +being able to explain them. Therefore, being able to measure compilation times is +increasingly important, and being able to explain them as well. A first +generation of tools aims to tackle this issue with their own specific +methodologies: + +- Buildbench[@buildbench] measures compiler execution times for basic + A-B compile-time comparisons in a web browser, +- Metabench[@metabench] instantiates variably sized benchmarks using embedded + Ruby (ERB) templating and plots compiler execution time, allowing scaling + analyses of meta-programs, +- Templight[@templight] adds Clang template instantiation inspection + capabilities with debugging and profiling tools. + +Additionally, Clang has a built-in profiler[@time-trace] that provides in-depth +time measurements of various compilation steps, which can be enabled by passing +the `-ftime-trace` flag. Its output contains data that can be directly linked to +symbols in the source code, making it easier to study the impact of specific +symbols on various stages of compilation. The output format is a JSON file meant +to be compatible with Chrome's flame graph visualizer, that contains a series of +time events with optional metadata like the mangled C++ symbol or the file +related to an event. The profiling data can then be visualized using tools such +as Google's [Perfetto UI](https://ui.perfetto.dev/). + +![Perfetto UI displaying a sample Clang time trace file](docs/images/perfetto-ui.png) + +Clang's profiler data is very exhaustive and insightful, however there is no +tooling to make sense of it in the context of variable size compile-time +benchmarks. **ctbench** tries to bridge the gap by providing a tool to analyze +this valuable data. It also improves upon existing tools by providing a solution +that's easy to integrate into existing CMake projects, and generates graphs in +various formats that are trivially embeddable in documents like research papers, +web pages, or documentations. Additionally, relying on persistent configuration, +benchmark declaration and description files provides strong guarantees for +benchmark reproductibility, as opposed to web tools or interactive profilers. + +# Functionality + +Originally inspired by Metabench[@metabench], **ctbench** development was driven by the need for a similar tool that allows the observation of Clang's time-trace files to help get a more comprehensive view on the impact of -metaprogramming techniques on compile times. - -A strong emphasis was put on developer friendliness, project integration, and -component reusability. ctbench provides a well documented CMake API for -benchmark declaration, allows benchmark generation using the C++ pre-processor, -and its C++ core can be used as a shared C++ library as well. - -The core library provides data representations to handle benchmarks cases -instantited at several sizes, each instance being repeated at least once. It -also provides tools to aggregate, filter, and sort data from time-trace events, -as well as various plotters that provide different aggregation and vizualisation -strategies. The plotters can generate files in various format thanks to the -Sciplot[@sciplot] library, and they are highly configurable through JSON -configuration files that are well documented. Default configuration files can be -generated using a dedicated CLI tool. - -Even though ctbench was made to analyze Clang's time-trace events, it can also -measure compiler execution time and report it in a synthetic time-trace file, -making it partially compatible with GCC as well. - -All these features make ctbench a very complete toolkit for compile-time +meta-programming techniques on compile times. A strong emphasis was put on +developer friendliness, project integration, and component reusability. + +**ctbench** provides: + +- a well documented CMake API for benchmark declaration, which can be generated + using the C++ pre-processor, + + Although CMake is not a proper programming language, it is used as the main + API for **ctbench** as most C++ developers are already familiar with it. + +- a set of JSON-configurable plotters with customizable data aggregation + features, which can be reused as a C++ library + + The core library provides data representations to handle benchmarks cases + instantiated at several sizes, each instance being repeated at least once. It + also provides tools to aggregate, filter, and sort data from time-trace + events, as well as various plotters that provide different aggregation and + visualisation strategies. The plotters can generate files in various format + thanks to the Sciplot[@sciplot] library, and they are highly configurable + through JSON configuration files that are well documented. Default + configuration files can be generated using a dedicated CLI tool. + +Despite the fact that **ctbench** was made to handle Clang's time-trace events, +it can also measure compiler execution time and report it in a synthetic +time-trace file, making it partially compatible with GCC as well. + +In addition to **ctbench**'s time-trace handling, it has a compatibility mode +for compilers that do not support it like GCC. This mode works by measuring +compiler execution time just like Metabench[@metabench] and generating a time-trace file +that contains compiler execution time. Moreover, the tooling makes defining +compilers per-target possible within a CMake project, allowing black-box +compiler performance comparisons between GCC and Clang for example or +comparisons between different versions of a compiler. + +All these features make **ctbench** a very complete toolkit for compile-time benchmarking, making comprehensive benchmark quick and easy, and the only compile-time benchmarking tool that can gater Clang profiling data for scaling analysis. +# Practical examples + +This section will cover a short yet practical example of ctbench usage. We want +to calculate the sum of a series of integers known at compile-time, using a type +template to store unsigned integer values at compile-time. + +We will be comparing the compile-time performance of two implementations: +- one based on a recursive function template, +- and one based on C++11 parameter pack expansion. + +First we need to include `utility` to instantiate our benchmark according to the +size parameter using `std::make_index_sequence`, and define the compile-time +container type for an unsigned integer: + +```cpp +#include + +/// Compile-time std::size_t +template struct ct_uint_t { + static constexpr std::size_t value = N; +}; +``` + +The first version of the metaprogram is based on a recursive template function: + +```cpp +/// Recursive compile-time sum implementation +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } +template constexpr auto sum(T const &) { return T{}; } + +template +constexpr auto sum(T const &, Ts const &...tl) { + return ct_uint_t{}; +} +``` + +And the other version relies on C++11 parameter pack expansion: + +```cpp +/// Expansion compile-time sum implementation +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } + +template constexpr auto sum(Ts const &...) { + return ct_uint_t<(Ts::value + ... + 0)>{}; +} +``` + +Both versions share the same interface, and thus the same driver code as well. +The driver code takes care of scaling the benchmark according to +`BENCHMARK_SIZE`, which is defined by **ctbench** through the CMake API: + +```cpp +// Driver code + +template constexpr auto foo() { + return [](std::index_sequence) { + return sum(ct_uint_t{}...); + } + (std::make_index_sequence{}); +} + +[[maybe_unused]] constexpr std::size_t result = + decltype(foo())::value; +``` + +The CMake code needed to run the benchmarks is the following: + +```cmake +ctbench_add_benchmark( + variadic_sum.expansion variadic_sum/expansion.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) + +ctbench_add_benchmark( + variadic_sum.recursive variadic_sum/recursive.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) +``` + +Then a graph target can be declared: + +```cmake +ctbench_add_graph(variadic_sum-compare-graph compare-all.json + variadic_sum.expansion variadic_sum.recursive) +``` + +with `compare-all.json` containing the following: + +```json +{ + "plotter": "compare_by", + "legend_title": "Timings", + "x_label": "Benchmark size factor", + "y_label": "Time (µs)", + "draw_average": true, + "demangle": false, + "draw_points": false, + "width": 800, + "height": 400, + "key_ptrs": ["/name", "/args/detail"], + "value_ptr": "/dur", + "plot_file_extensions": [".svg"] +} +``` + +This configuration file uses the `compare_by` plotter to generate one plot for +each pair of elements designated by the JSON pointers in `key_ptrs`, namely +`/name` and `/args/detail`. The first pointer designates the LLVM timer name, +and the second *may* refer to metadata such a C++ symbol, or a C++ source +filename. The `demangle` option may be used to demangle C++ symbols using LLVM. + +The result is a series of graphs, each one designating a particular timer event, +specific to a source or a symbol whenever it is possible (ie. whenever metadata +is present in the `/args/detail` value of a timer event). Each graph compares +the evolution of these timer events in function of the benchmark size. + +The graphs following were generated from the +[**ctbench** example](https://github.com/JPenuchot/ctbench/tree/joss/example) +on a Lenovo T470 with an Intel i5 6300U and 8GB of RAM. The compiler is Clang +14.0.6, and [Pyperf](https://pyperf.readthedocs.io/en/latest/system.html) was +used to turn off CPU frequency scaling. + +![ExecuteCompiler](docs/images/ExecuteCompiler.svg){width=100%} + +The first timer we want to look at is ExecuteCompiler, which is the total +compilation time. Starting from there we can go down in the timer event +hierarchy to take a look at frontend and backend execution times. + +![Total Frontend](docs/images/Total_Frontend.svg){width=100%} + +![Total Backend](docs/images/Total_Backend.svg){width=100%} + +The backend is not being impacted here, supposedly because this is purely a +compile-time program, and the output program is empty. However this might not be +the case for all meta-programs, and meta-programs might have different impacts on +the backend as they may generate programs in different ways (ie. generate more +symbols, larger symbols, more data structures, etc.). + +![Total InstantiateFunction](docs/images/Total_InstantiateFunction.svg){width=100%} + +The Total Instantiate function timer is an interesting one as it explicitly +targets function instanciation time. Note that timers that are prefixed with +"Total" measure the total time spent in a timer section, regardless of the +specific symbol or source associated to its individual timer events. + +![InstantiateFunction foovoid](docs/images/InstantiateFunction/foovoid.svg){width=100%} + +Finally, we can take a look at `InstantiateFunction/foovoid.svg` which measures +the InstantiateFunction event time specifically for `foo()`, which is our +driver template function. Using Perfetto UI to look at the timer event +hierarchy, we can validate that the timer event for this specific symbol +includes the InstantiateFunction time for all the symbols that may be +instantiated within this function. + +This level of detail and granularity in the analysis of compile-time benchmarks +was never reached before, and may help us set good practices to improve the +compile-time performance of meta-programs. + # Statement of interest -ctbench was first presented at the CPPP 2021 conference[@ctbench-cppp21] which -is the main C++ technical conference in France. It is being used to benchmark -examples from the poacher[@poacher] project, which was briefly presented at the -Meeting C++ 2022[@meetingcpp22] technical conference. +**ctbench** was first presented at the CPPP 2021 conference[@ctbench-cppp21] +which is the main C++ technical conference in France. It is being used to +benchmark examples from the poacher[@poacher] project, which was briefly +presented at the Meeting C++ 2022[@meetingcpp22] technical conference. -# Practical examples +# Related projects + +- [Poacher](https://github.com/jpenuchot/poacher): Experimental constexpr + parsing and code generation for the integration of arbitrary syntax DSL in + C++20 -Poacher is a series of experimental projects meant to help us understanding what -metaprogramming could be thanks to new C++ features such as non-transient -constexpr memory allocation[@constexpr-memory]. It helped us getting hands-on -experience on code generation using constexpr allocated memory, studying and -overcoming the roadblocks, and evaluating the compile-time impact of the -involved techniques. - - - - - - +- [Rule of Cheese](https://github.com/jpenuchot/rule-of-cheese): A collection of + compile-time microbenchmarks to help set better C++ meta-programming guidelines + to improve compile-time performance # Acknowledgements -We acknowledge contributions from Philippe Virouleau +We acknowledge contributions from Philippe Virouleau and Paul Keir for their +insightful suggestions. # References diff --git a/ttw/ttw.cpp b/ttw/ttw.cpp index ff171ac..f5995a0 100644 --- a/ttw/ttw.cpp +++ b/ttw/ttw.cpp @@ -32,7 +32,9 @@ inline int get_timetrace_file(std::filesystem::path const time_trace_file_dest, namespace fs = std::filesystem; // Run program and measure CPU time - rusage children_rusage_begin, children_rusage_end; + rusage children_rusage_begin; + rusage children_rusage_end; + getrusage(RUSAGE_CHILDREN, &children_rusage_begin); // TODO: Bypass shell call and get return value int const ret = std::system(compile_command.c_str()); @@ -108,22 +110,23 @@ int main(int argc, char const *argv[]) { for (auto beg = &argv[args_start_id], end = &argv[argc]; beg < end; beg++) { // Current argument as a string_view - std::string_view current_arg{*beg}; + std::string_view current_argument{*beg}; // Handling -o flag - if (current_arg == std::string_view("-o") && beg + 1 != end) { + if (current_argument == std::string_view("-o") && beg + 1 != end) { obj_path = *(beg + 1); } // Handling Clang -ftime-trace flag - else if (current_arg == "-ftime-trace" || current_arg == "--ftime-trace") { + else if (current_argument == "-ftime-trace" || + current_argument == "--ftime-trace") { has_time_trace_flag = true; } // Handling --override-compiler flag - else if (current_arg.starts_with(override_flag_prefix)) { - current_arg.remove_prefix(override_flag_prefix.size()); - compiler_executable = current_arg; + else if (current_argument.starts_with(override_flag_prefix)) { + current_argument.remove_prefix(override_flag_prefix.size()); + compiler_executable = current_argument; // Do not pass argument to the compiler continue; @@ -135,7 +138,7 @@ int main(int argc, char const *argv[]) { std::string compile_command = std::move(compiler_executable) + args_builder.str(); - if (std::getenv("CTBENCH_TTW_VERBOSE")) { + if (std::getenv("CTBENCH_TTW_VERBOSE") != nullptr) { std::cout << "[CTBENCH_TTW] Compile command: " << compile_command << '\n'; }