From b68f449d014014bfea9dc473808612f1c2b89808 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 28 Dec 2022 14:56:06 +0100 Subject: [PATCH 01/43] Readability improvements: more explicit variable names, added a function to make compare_by.plot less bloated --- grapher/include/grapher/utils/json.hpp | 2 +- grapher/lib/grapher/plotters/compare_by.cpp | 141 +++++++++++--------- grapher/lib/grapher/plotters/plotters.cpp | 6 +- grapher/lib/grapher/plotters/stack.cpp | 16 ++- grapher/lib/grapher/predicates.cpp | 46 +++---- grapher/lib/grapher/utils/json.cpp | 19 +-- 6 files changed, 128 insertions(+), 102 deletions(-) diff --git a/grapher/include/grapher/utils/json.hpp b/grapher/include/grapher/utils/json.hpp index 4a399ef..9bd5b8e 100644 --- a/grapher/include/grapher/utils/json.hpp +++ b/grapher/include/grapher/utils/json.hpp @@ -172,7 +172,7 @@ grapher::json_t::array_t write_descriptors(std::vector const &descriptors); /// Reads a single descriptor. -group_descriptor_t read_descriptor(grapher::json_t const &j); +group_descriptor_t read_descriptor(grapher::json_t const &descriptor_json); /// Reads descriptors from a predicate list. std::vector diff --git a/grapher/lib/grapher/plotters/compare_by.cpp b/grapher/lib/grapher/plotters/compare_by.cpp index c8c3901..2629bfd 100644 --- a/grapher/lib/grapher/plotters/compare_by.cpp +++ b/grapher/lib/grapher/plotters/compare_by.cpp @@ -97,15 +97,15 @@ std::string to_string(key_t const &key, bool demangle = true) { return "empty"; } - std::string res = demangle ? llvm::demangle(key[0]) : key[0]; + std::string result = demangle ? llvm::demangle(key[0]) : key[0]; std::for_each(key.begin() + 1, key.end(), [&](std::string const &part) { - res += '/'; - for (char const c : demangle ? llvm::demangle(part) : part) { - res += c == '/' ? '_' : c; + result += '/'; + for (char const name_character : demangle ? llvm::demangle(part) : part) { + result += name_character == '/' ? '_' : name_character; } }); - return res; + return result; } // ============================================================================= @@ -126,6 +126,74 @@ grapher::json_t plotter_compare_by_t::get_default_config() const { return res; } +/// Parameter list for the generate_plot function +/// extracted into a struct for readability +struct generate_plot_params_t { + std::filesystem::path const &plot_output_folder; + grapher::json_t const &plotter_config; + bool draw_average; + bool draw_points; + bool demangle; +}; + +/// Function to generate one plot. +/// NB: This function must remain free of config reading logic. +inline void generate_plot( + curve_aggregate_map_t::const_iterator::value_type aggregate_key_value, + generate_plot_params_t const ¶meters) { + ZoneScoped; + auto const &[key, curve_aggregate] = aggregate_key_value; + + // Plot init + sciplot::Plot2D plot; + + for (auto const &[bench_name, benchmark_curve] : curve_aggregate) { + // Average curve coord vectors + std::vector x_curve; + std::vector y_curve; + + // Point coord vectors + std::vector x_points; + std::vector y_points; + + // Build point & curve vectors + for (auto const &[x_value, y_values] : benchmark_curve) { + // Building average curve vector + if (parameters.draw_average && !y_values.empty()) { + double const sum = std::reduce(y_values.begin(), y_values.end()); + double const average_point_y = sum / y_values.size(); + + x_curve.push_back(x_value); + y_curve.push_back(average_point_y); + } + + // Building point vector + if (parameters.draw_points) { + for (double y_value : y_values) { + x_points.push_back(x_value); + y_points.push_back(y_value); + } + } + } + + // Plot drawing + + if (parameters.draw_average && !x_curve.empty()) { + // Draw average curve + plot.drawCurve(x_curve, y_curve).label(bench_name + " average"); + } + + if (parameters.draw_points && !x_points.empty()) { + // Draw points + plot.drawPoints(x_points, y_points).label(bench_name + " points"); + } + } + + save_plot(std::move(plot), + parameters.plot_output_folder / to_string(key, parameters.demangle), + parameters.plotter_config); +} + void plotter_compare_by_t::plot(benchmark_set_t const &bset, std::filesystem::path const &dest, grapher::json_t const &config) const { @@ -145,8 +213,8 @@ void plotter_compare_by_t::plot(benchmark_set_t const &bset, std::vector key_ptrs; std::transform(key_strs.begin(), key_strs.end(), std::back_inserter(key_ptrs), - [](std::string const &s) -> json_t::json_pointer { - return json_t::json_pointer{s}; + [](std::string const &pointer) -> json_t::json_pointer { + return json_t::json_pointer{pointer}; }); // Wrangling @@ -157,61 +225,14 @@ void plotter_compare_by_t::plot(benchmark_set_t const &bset, fs::create_directories(dest); // Drawing, ie. unwrapping the nested maps and drawing curves + saving plots - std::for_each( std::execution::par_unseq, curve_aggregate_map.begin(), - curve_aggregate_map.end(), [&](auto const &kv) { - ZoneScoped; - auto const &[key, curve_aggregate] = kv; - - // Plot init - sciplot::Plot2D plot; - - for (auto const &[bench_name, benchmark_curve] : curve_aggregate) { - // Average curve coord vectors - std::vector x_curve; - std::vector y_curve; - - // Point coord vectors - std::vector x_points; - std::vector y_points; - - // Build point & curve vectors - for (auto const &[x, y_vec] : benchmark_curve) { - // Building average curve vector - if (draw_average && !y_vec.empty()) { - double const sum = std::reduce(y_vec.begin(), y_vec.end()); - std::size_t const n = y_vec.size(); - - double const y = sum / n; - - x_curve.push_back(x); - y_curve.push_back(y); - } - - // Building point vector - if (draw_points) { - for (double y : y_vec) { - x_points.push_back(x); - y_points.push_back(y); - } - } - } - - // Plot drawing - - if (draw_average && !x_curve.empty()) { - // Draw average curve - plot.drawCurve(x_curve, y_curve).label(bench_name + " average"); - } - - if (draw_points && !x_points.empty()) { - // Draw points - plot.drawPoints(x_points, y_points).label(bench_name + " points"); - } - } - - save_plot(std::move(plot), dest / to_string(key, demangle), config); + curve_aggregate_map.end(), [&](auto const &aggregate_key_value) { + generate_plot(aggregate_key_value, {.plot_output_folder = dest, + .plotter_config = config, + .draw_average = draw_average, + .draw_points = draw_points, + .demangle = demangle}); }); } diff --git a/grapher/lib/grapher/plotters/plotters.cpp b/grapher/lib/grapher/plotters/plotters.cpp index 1ffc3ee..495dbaf 100644 --- a/grapher/lib/grapher/plotters/plotters.cpp +++ b/grapher/lib/grapher/plotters/plotters.cpp @@ -8,9 +8,9 @@ plotter_type_t string_to_plotter_type(std::string const &name) { return compare_v; } - if (auto const it = plotter_name_map.find(name); - it != plotter_name_map.end()) { - return it->second; + if (auto const name_iterator = plotter_name_map.find(name); + name_iterator != plotter_name_map.end()) { + return name_iterator->second; } return compare_v; diff --git a/grapher/lib/grapher/plotters/stack.cpp b/grapher/lib/grapher/plotters/stack.cpp index 2647c48..49b0231 100644 --- a/grapher/lib/grapher/plotters/stack.cpp +++ b/grapher/lib/grapher/plotters/stack.cpp @@ -52,15 +52,17 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, apply_config(plot, config); // x axis - std::vector x; - std::transform( - bench.instances.begin(), bench.instances.end(), std::back_inserter(x), - [](benchmark_instance_t const &i) -> double { return i.size; }); + std::vector x_axis; + std::transform(bench.instances.begin(), bench.instances.end(), + std::back_inserter(x_axis), + [](benchmark_instance_t const &element) -> double { + return element.size; + }); // Low y axis - std::vector y_low(x.size(), 0.); + std::vector y_low(x_axis.size(), 0.); // High y axis - std::vector y_high(x.size()); + std::vector y_high(x_axis.size()); for (group_descriptor_t const &descriptor : descriptors) { // Storing previous value as we iterate @@ -89,7 +91,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, max_y_val = std::max(max_y_val, y_val); } - plot.drawCurvesFilled(x, y_low, y_high).label(std::move(curve_name)); + plot.drawCurvesFilled(x_axis, y_low, y_high).label(std::move(curve_name)); // Swapping std::swap(y_low, y_high); diff --git a/grapher/lib/grapher/predicates.cpp b/grapher/lib/grapher/predicates.cpp index 5cb60e9..77dbdcf 100644 --- a/grapher/lib/grapher/predicates.cpp +++ b/grapher/lib/grapher/predicates.cpp @@ -87,34 +87,34 @@ inline auto match(grapher::json_t const &constraint) { regex_match_opt = constraint.value("regex", false)]( grapher::json_t const &value) -> bool { auto items_instance_proxy = matcher_flat.items(); - return std::all_of( - items_instance_proxy.begin(), items_instance_proxy.end(), - [&](auto const &matcher_item_kv) -> bool { - // Pointer to the value we should observe - grapher::json_t::json_pointer const ptr(matcher_item_kv.key()); + return std::all_of(items_instance_proxy.begin(), items_instance_proxy.end(), + [&](auto const &matcher_item_kv) -> bool { + // Pointer to the value we should observe + grapher::json_t::json_pointer const ptr( + matcher_item_kv.key()); - // Checking for existence of matching value - if (!value.contains(ptr)) { - return false; - } + // Checking for existence of matching value + if (!value.contains(ptr)) { + return false; + } - // Regex match - if (regex_match_opt) { - grapher::json_t const &val = value[ptr]; + // Regex match + if (regex_match_opt) { + grapher::json_t const &val = value[ptr]; - // Fallback to non regex if the value isn't a string - if (!val.is_string()) { - return val == matcher_item_kv.value(); - } + // Fallback to non regex if the value isn't a string + if (!val.is_string()) { + return val == matcher_item_kv.value(); + } - return std::regex_match( - val.get_ref(), - std::regex(matcher_item_kv.value())); - } + return std::regex_match( + val.get_ref(), + std::regex(matcher_item_kv.value())); + } - // Regular match - return value[ptr] == matcher_item_kv.value(); - }); + // Regular match + return value[ptr] == matcher_item_kv.value(); + }); }; } diff --git a/grapher/lib/grapher/utils/json.cpp b/grapher/lib/grapher/utils/json.cpp index d8853ba..a165361 100644 --- a/grapher/lib/grapher/utils/json.cpp +++ b/grapher/lib/grapher/utils/json.cpp @@ -59,16 +59,18 @@ grapher::json_t::array_t extract_group(group_descriptor_t const &descriptor, std::ranges::copy_if( events, std::back_inserter(res), [&](grapher::json_t const &event) { return std::ranges::all_of( - predicates, [&](predicate_t const &p) { return p(event); }); + predicates, + [&](predicate_t const &predicate) { return predicate(event); }); }); return res; } -group_descriptor_t read_descriptor(grapher::json_t const &j) { - return {.name = get_as_ref(j, "name"), - .predicates = - get_as_ref(j, "predicates")}; +group_descriptor_t read_descriptor(grapher::json_t const &descriptor_json) { + return {.name = get_as_ref(descriptor_json, + "name"), + .predicates = get_as_ref( + descriptor_json, "predicates")}; } grapher::json_t group_descriptor_json(group_descriptor_t const &descriptor) { @@ -105,14 +107,15 @@ std::vector get_values(benchmark_instance_t const &instance, auto get_val = [&](std::filesystem::path const &repetition_path) -> double { // Extract events - grapher::json_t j; + grapher::json_t repetition_data; { std::ifstream repetition_ifstream(repetition_path); - repetition_ifstream >> j; + repetition_ifstream >> repetition_data; } grapher::json_t::array_t const &events = - get_as_ref(j, "traceEvents"); + get_as_ref(repetition_data, + "traceEvents"); // Accumulate double val = 0.; From d87aa97d1eb7ceeab500116f5904206540ba8591 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 28 Dec 2022 14:57:10 +0100 Subject: [PATCH 02/43] Other readability improvements: explicit variable names and explicit nullptr check --- ttw/ttw.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ttw/ttw.cpp b/ttw/ttw.cpp index ff171ac..f5995a0 100644 --- a/ttw/ttw.cpp +++ b/ttw/ttw.cpp @@ -32,7 +32,9 @@ inline int get_timetrace_file(std::filesystem::path const time_trace_file_dest, namespace fs = std::filesystem; // Run program and measure CPU time - rusage children_rusage_begin, children_rusage_end; + rusage children_rusage_begin; + rusage children_rusage_end; + getrusage(RUSAGE_CHILDREN, &children_rusage_begin); // TODO: Bypass shell call and get return value int const ret = std::system(compile_command.c_str()); @@ -108,22 +110,23 @@ int main(int argc, char const *argv[]) { for (auto beg = &argv[args_start_id], end = &argv[argc]; beg < end; beg++) { // Current argument as a string_view - std::string_view current_arg{*beg}; + std::string_view current_argument{*beg}; // Handling -o flag - if (current_arg == std::string_view("-o") && beg + 1 != end) { + if (current_argument == std::string_view("-o") && beg + 1 != end) { obj_path = *(beg + 1); } // Handling Clang -ftime-trace flag - else if (current_arg == "-ftime-trace" || current_arg == "--ftime-trace") { + else if (current_argument == "-ftime-trace" || + current_argument == "--ftime-trace") { has_time_trace_flag = true; } // Handling --override-compiler flag - else if (current_arg.starts_with(override_flag_prefix)) { - current_arg.remove_prefix(override_flag_prefix.size()); - compiler_executable = current_arg; + else if (current_argument.starts_with(override_flag_prefix)) { + current_argument.remove_prefix(override_flag_prefix.size()); + compiler_executable = current_argument; // Do not pass argument to the compiler continue; @@ -135,7 +138,7 @@ int main(int argc, char const *argv[]) { std::string compile_command = std::move(compiler_executable) + args_builder.str(); - if (std::getenv("CTBENCH_TTW_VERBOSE")) { + if (std::getenv("CTBENCH_TTW_VERBOSE") != nullptr) { std::cout << "[CTBENCH_TTW] Compile command: " << compile_command << '\n'; } From f49cab948f8394f5e38d5be081ee29c4dd1f8122 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 28 Dec 2022 14:58:21 +0100 Subject: [PATCH 03/43] Added clang-tidy checks to clangd config, they should be migrated to .clang-tidy in the future for CI/CD checks --- .clangd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.clangd b/.clangd index 0a48ed4..935cc6a 100644 --- a/.clangd +++ b/.clangd @@ -2,3 +2,9 @@ CompileFlags: CompilationDatabase: build/ Diagnostics: UnusedIncludes: Strict + ClangTidy: + Add: + - 'bugprone-*' + - 'readability-*' + - 'clang-analyzer-core.*' + - 'clang-analyzer-security.*' From 35cee466d0c4c1286e5c9f66f14167ff2af86ee9 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 28 Dec 2022 15:00:09 +0100 Subject: [PATCH 04/43] Removed clang-tidy management from CMake code as checks are now enforced by clangd --- CMakeLists.txt | 4 ---- CMakePresets.json | 3 +-- cmake/options.cmake | 4 ---- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 83cc713..0691a8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,10 +3,6 @@ project(ctbench VERSION 1.1.1) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -if(${CTBENCH_ENABLE_CLANG_TIDY}) - set(CMAKE_CXX_CLANG_TIDY clang-tidy -checks=-*,readability-*) -endif() - include(cmake/dependencies.cmake) include(cmake/ctbench-compile-opts.cmake) diff --git a/CMakePresets.json b/CMakePresets.json index 1b6d0e2..734a23a 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -16,8 +16,7 @@ "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CXX_COMPILER": "clang++", - "CMAKE_C_COMPILER": "clang", - "CMAKE_CXX_CLANG_TIDY": "clang-tidy;-checks=-*,readability-*" + "CMAKE_C_COMPILER": "clang" } }, { diff --git a/cmake/options.cmake b/cmake/options.cmake index edb598e..f08f911 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -11,7 +11,3 @@ set(CTBENCH_ENABLE_TESTS set(CTBENCH_ENABLE_TRACY OFF CACHE BOOL "ctbench option: Enable Tracy profiler") - -set(CTBENCH_ENABLE_CLANG_TIDY - OFF - CACHE BOOL "ctbench option: Enable clang tidy") From a5abf6e79d2d261c2c160f561b0bfe19c43c18c9 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 28 Dec 2022 15:56:09 +0100 Subject: [PATCH 05/43] Added type alias for default scalar type in grapher --- grapher/include/grapher/core.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/grapher/include/grapher/core.hpp b/grapher/include/grapher/core.hpp index fa15477..f628d55 100644 --- a/grapher/include/grapher/core.hpp +++ b/grapher/include/grapher/core.hpp @@ -26,6 +26,9 @@ using multimap_t = /// Alias type for JSON objects. using json_t = nlohmann::basic_json; +/// Default type to represent scalar values from benchmark data. +using value_t = unsigned long; + // `time cmake --build --preset bench` results using different containers // (poacher/brainfuck project, pre-built benchmark targets): // - boost::container::flat_map -> 78.05 secs From a84a91065f8b855aeb720e6c48a389852cecbbc8 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 28 Dec 2022 15:57:40 +0100 Subject: [PATCH 06/43] More refactoring: function splitting in compare_by plotter code, more explicit variable naming, removed instances of double being used to represent unsigned long values from time-trace files --- grapher/include/grapher/utils/json.hpp | 7 +- grapher/lib/grapher/plotters/compare.cpp | 12 +-- grapher/lib/grapher/plotters/compare_by.cpp | 104 ++++++++++++-------- grapher/lib/grapher/plotters/stack.cpp | 16 +-- grapher/lib/grapher/utils/json.cpp | 20 ++-- 5 files changed, 93 insertions(+), 66 deletions(-) diff --git a/grapher/include/grapher/utils/json.hpp b/grapher/include/grapher/utils/json.hpp index 9bd5b8e..4808fa8 100644 --- a/grapher/include/grapher/utils/json.hpp +++ b/grapher/include/grapher/utils/json.hpp @@ -19,9 +19,10 @@ namespace grapher { /// For each instance in entry, returns the sum of the values pointed by /// value_jptr in the events matching the descriptor's predicates. -std::vector get_values(benchmark_instance_t const &instance, - std::vector const &predicates, - grapher::json_t::json_pointer value_jptr); +std::vector +get_values(benchmark_instance_t const &instance, + std::vector const &predicates, + grapher::json_t::json_pointer value_jptr); /// Generic conversion of a JSON field location to a std::string template diff --git a/grapher/lib/grapher/plotters/compare.cpp b/grapher/lib/grapher/plotters/compare.cpp index 55cf2e1..7f346e2 100644 --- a/grapher/lib/grapher/plotters/compare.cpp +++ b/grapher/lib/grapher/plotters/compare.cpp @@ -59,11 +59,11 @@ void plotter_compare_t::plot(benchmark_set_t const &bset, std::vector predicates = get_predicates(descriptor); for (benchmark_case_t const &bench : bset) { - std::vector x_points; - std::vector y_points; + std::vector x_points; + std::vector y_points; - std::vector x_average; - std::vector y_average; + std::vector x_average; + std::vector y_average; for (benchmark_instance_t const &instance : bench.instances) { check(!instance.repetitions.empty(), @@ -71,7 +71,7 @@ void plotter_compare_t::plot(benchmark_set_t const &bset, bench.name, instance.size), error_level_t::warning_v); - std::vector const values = + std::vector const values = get_values(instance, predicates, value_json_pointer); check(!values.empty(), @@ -82,7 +82,7 @@ void plotter_compare_t::plot(benchmark_set_t const &bset, // Drawing points if (draw_points) { - for (double value : values) { + for (grapher::value_t value : values) { x_points.push_back(instance.size); y_points.push_back(value); } diff --git a/grapher/lib/grapher/plotters/compare_by.cpp b/grapher/lib/grapher/plotters/compare_by.cpp index 2629bfd..c9c234d 100644 --- a/grapher/lib/grapher/plotters/compare_by.cpp +++ b/grapher/lib/grapher/plotters/compare_by.cpp @@ -26,26 +26,62 @@ namespace grapher::plotters { // Plot-friendly data structures +/// Value key type. Contains multiple values to group by a tuple of parameters +using key_t = boost::container::small_vector; + /// Point aggregate (multiple Y coordinates) -using point_data_t = std::vector; +using point_data_t = std::vector; /// Curve: X -> vec -using benchmark_curve_t = map_t; +using benchmark_curve_t = grapher::map_t; /// Benchmark name -> Curve -using curve_aggregate_t = map_t; - -/// Value key type. Contains multiple values to group by a tuple of parameters -using key_t = boost::container::small_vector; +using curve_aggregate_t = grapher::map_t; /// Feature -> Benchmark aggregate -using curve_aggregate_map_t = map_t; +using curve_aggregate_map_t = grapher::map_t; + +struct process_event_parameters_t { + std::vector const &key_pointers; + json_t::json_pointer const &value_pointer; + benchmark_case_t const &bench_case; + benchmark_instance_t const &instance; +}; -/// Wrangles data into a structure that's easier to work with for plotting. +/// Generate a curve for a given time-trace event and stores it in output_map. +inline void process_event(curve_aggregate_map_t &output_map, + grapher::json_t const &event, + process_event_parameters_t const ¶meters) { + // Building key from JSON pointers + key_t key; + for (json_t::json_pointer const &key_ptr : parameters.key_pointers) { + if (event.contains(key_ptr) && event[key_ptr].is_string()) { + key.push_back(event[key_ptr]); + } + } + + // Key/value presence and type checks + if (check(event.contains(parameters.value_pointer), + fmt::format("No value at {}: {}", + parameters.value_pointer.to_string(), event.dump()), + info_v) && + check(event[parameters.value_pointer].is_number(), + fmt::format("Value at {} is not an integer: {}", + parameters.value_pointer.to_string(), event.dump()), + info_v)) { + // Adding value + output_map[key][parameters.bench_case.name][parameters.instance.size] + .push_back(event[parameters.value_pointer]); + } +} + +/// Scans event data at value_pointer and generates curves for each key +/// generated from key_pointers. The curves are stored in a nested map +/// structure. curve_aggregate_map_t get_bench_curves(benchmark_set_t const &bset, - std::vector const &key_ptrs, - json_t::json_pointer const &val_ptr) { + std::vector const &key_pointers, + json_t::json_pointer const &value_pointer) { ZoneScoped; namespace fs = std::filesystem; @@ -62,27 +98,11 @@ get_bench_curves(benchmark_set_t const &bset, for (grapher::json_t const &event : get_as_ref( repetition_json, "traceEvents")) { - - // Building key from JSON pointers - key_t key; - for (json_t::json_pointer const &key_ptr : key_ptrs) { - if (event.contains(key_ptr) && event[key_ptr].is_string()) { - key.push_back(event[key_ptr]); - } - } - - // Key/value presence and type checks - if (check(event.contains(val_ptr), - fmt::format("No value at {}: {}", val_ptr.to_string(), - event.dump()), - info_v) && - check(event[val_ptr].is_number(), - fmt::format("Value at {} is not an integer: {}", - val_ptr.to_string(), event.dump()), - info_v)) { - // Adding value - res[key][bench_case.name][instance.size].push_back(event[val_ptr]); - } + process_event(res, event, + {.key_pointers = key_pointers, + .value_pointer = value_pointer, + .bench_case = bench_case, + .instance = instance}); } } } @@ -128,7 +148,7 @@ grapher::json_t plotter_compare_by_t::get_default_config() const { /// Parameter list for the generate_plot function /// extracted into a struct for readability -struct generate_plot_params_t { +struct generate_plot_parameters_t { std::filesystem::path const &plot_output_folder; grapher::json_t const &plotter_config; bool draw_average; @@ -140,8 +160,9 @@ struct generate_plot_params_t { /// NB: This function must remain free of config reading logic. inline void generate_plot( curve_aggregate_map_t::const_iterator::value_type aggregate_key_value, - generate_plot_params_t const ¶meters) { - ZoneScoped; + generate_plot_parameters_t const ¶meters) { + ZoneScoped; // Used for profiling with Tracy + auto const &[key, curve_aggregate] = aggregate_key_value; // Plot init @@ -149,19 +170,20 @@ inline void generate_plot( for (auto const &[bench_name, benchmark_curve] : curve_aggregate) { // Average curve coord vectors - std::vector x_curve; - std::vector y_curve; + std::vector x_curve; + std::vector y_curve; // Point coord vectors - std::vector x_points; - std::vector y_points; + std::vector x_points; + std::vector y_points; // Build point & curve vectors for (auto const &[x_value, y_values] : benchmark_curve) { // Building average curve vector if (parameters.draw_average && !y_values.empty()) { - double const sum = std::reduce(y_values.begin(), y_values.end()); - double const average_point_y = sum / y_values.size(); + grapher::value_t const sum = + std::reduce(y_values.begin(), y_values.end()); + grapher::value_t const average_point_y = sum / y_values.size(); x_curve.push_back(x_value); y_curve.push_back(average_point_y); @@ -169,7 +191,7 @@ inline void generate_plot( // Building point vector if (parameters.draw_points) { - for (double y_value : y_values) { + for (grapher::value_t y_value : y_values) { x_points.push_back(x_value); y_points.push_back(y_value); } diff --git a/grapher/lib/grapher/plotters/stack.cpp b/grapher/lib/grapher/plotters/stack.cpp index 49b0231..fc5f5a1 100644 --- a/grapher/lib/grapher/plotters/stack.cpp +++ b/grapher/lib/grapher/plotters/stack.cpp @@ -44,7 +44,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, std::vector plots; // Storing max y value for normalization - double max_y_val = 0.; + grapher::value_t max_y_val = 0.; /// Draws a stacked curve graph for a given benchmark auto draw_plot = [&](benchmark_case_t const &bench) -> sciplot::Plot2D { @@ -52,17 +52,17 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, apply_config(plot, config); // x axis - std::vector x_axis; + std::vector x_axis; std::transform(bench.instances.begin(), bench.instances.end(), std::back_inserter(x_axis), - [](benchmark_instance_t const &element) -> double { + [](benchmark_instance_t const &element) -> grapher::value_t { return element.size; }); // Low y axis - std::vector y_low(x_axis.size(), 0.); + std::vector y_low(x_axis.size(), 0.); // High y axis - std::vector y_high(x_axis.size()); + std::vector y_high(x_axis.size()); for (group_descriptor_t const &descriptor : descriptors) { // Storing previous value as we iterate @@ -72,7 +72,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, for (std::size_t i = 0; i < bench.instances.size(); i++) { benchmark_instance_t const &instance = bench.instances[i]; - std::vector const values = + std::vector const values = get_values(instance, predicates, feature_value_jptr); check(values.empty(), @@ -81,7 +81,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, descriptor.name, bench.name, instance.size)); // TODO: Get better stats (standard deviation, etc...) - double const y_val = + grapher::value_t const y_val = y_low[i] + std::reduce(values.begin(), values.end()) / values.size(); @@ -108,7 +108,7 @@ void plotter_stack_t::plot(benchmark_set_t const &bset, // Normalize & save std::filesystem::create_directories(dest); for (std::size_t i = 0; i < bset.size(); i++) { - plots[i].yrange(0., max_y_val); + plots[i].yrange(0., double(max_y_val)); save_plot(plots[i], dest / bset[i].name, config); } } diff --git a/grapher/lib/grapher/utils/json.cpp b/grapher/lib/grapher/utils/json.cpp index a165361..cf0f61e 100644 --- a/grapher/lib/grapher/utils/json.cpp +++ b/grapher/lib/grapher/utils/json.cpp @@ -100,12 +100,14 @@ read_descriptors(grapher::json_t::array_t const &list) { return res; } -std::vector get_values(benchmark_instance_t const &instance, - std::vector const &predicates, - grapher::json_t::json_pointer value_jptr) { - std::vector res(instance.repetitions.size()); - - auto get_val = [&](std::filesystem::path const &repetition_path) -> double { +std::vector +get_values(benchmark_instance_t const &instance, + std::vector const &predicates, + grapher::json_t::json_pointer value_jptr) { + std::vector res(instance.repetitions.size()); + + auto get_val = + [&](std::filesystem::path const &repetition_path) -> grapher::value_t { // Extract events grapher::json_t repetition_data; { @@ -118,10 +120,12 @@ std::vector get_values(benchmark_instance_t const &instance, "traceEvents"); // Accumulate - double val = 0.; + grapher::value_t val = 0; for (grapher::json_t const &event : events) { if (std::all_of(predicates.begin(), predicates.end(), - [&](predicate_t const &p) -> bool { return p(event); })) { + [&](predicate_t const &predicate) -> bool { + return predicate(event); + })) { val += get_as_ref(event, value_jptr); } } From a5e18b3cc5bfb4a9e562fabb305d2a6dfee8a104 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 9 Jan 2023 13:44:28 +0100 Subject: [PATCH 07/43] Added new graphs and updated old ones --- docs/images/Backend.svg | 233 +++++++ docs/images/ExecuteCompiler.svg | 638 +++--------------- docs/images/Frontend.svg | 262 +++++++ .../run_programprogram_string.svg | 237 +++++++ docs/images/Total_Frontend.svg | 262 +++++++ docs/images/Total_InstantiateFunction.svg | 171 +++-- 6 files changed, 1183 insertions(+), 620 deletions(-) create mode 100644 docs/images/Backend.svg create mode 100644 docs/images/Frontend.svg create mode 100644 docs/images/InstantiateFunction/run_programprogram_string.svg create mode 100644 docs/images/Total_Frontend.svg diff --git a/docs/images/Backend.svg b/docs/images/Backend.svg new file mode 100644 index 0000000..cb62bb3 --- /dev/null +++ b/docs/images/Backend.svg @@ -0,0 +1,233 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + bfbench-consecutiveloops-et-clang-tt average + + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + Timings + + + + + bfbench-consecutiveloops-et-clang-tt average + + + bfbench-consecutiveloops-et-clang-tt average + + + + + + bfbench-consecutiveloops-flat-clang-tt average + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + + 110000 + + + + + 120000 + + + + + 130000 + + + + + 140000 + + + + + 150000 + + + + + 160000 + + + + + 170000 + + + + + 180000 + + + + + 1 + + + + + 2 + + + + + 3 + + + + + 4 + + + + + 5 + + + + + 6 + + + + + 7 + + + + + 8 + + + + + 9 + + + + + 10 + + + + + + + + + + + + + diff --git a/docs/images/ExecuteCompiler.svg b/docs/images/ExecuteCompiler.svg index d9e36b9..3551f48 100644 --- a/docs/images/ExecuteCompiler.svg +++ b/docs/images/ExecuteCompiler.svg @@ -1,17 +1,17 @@ Gnuplot -Produced by GNUPLOT 5.4 patchlevel 3 +Produced by GNUPLOT 5.4 patchlevel 5 - + @@ -44,632 +44,210 @@ - - - - - + + + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-consecutiveloops-et - - - - bfbench-consecutiveloops-flat - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-consecutiveloops-flat - - - - bfbench-imbricatedloops-et - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-imbricatedloops-et + bfbench-consecutiveloops-et-clang-tt average - + - bfbench-imbricatedloops-flat - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bfbench-imbricatedloops-flat + bfbench-consecutiveloops-flat-clang-tt average - + - - + + Timings - bfbench-consecutiveloops-et - - - bfbench-consecutiveloops-et - - - - - - - bfbench-consecutiveloops-et - - - bfbench-consecutiveloops-et - - - - - - bfbench-consecutiveloops-flat - - - bfbench-consecutiveloops-flat - - - - - - - bfbench-consecutiveloops-flat - - - bfbench-consecutiveloops-flat - - - - - - bfbench-imbricatedloops-et + bfbench-consecutiveloops-et-clang-tt average - - bfbench-imbricatedloops-et + + bfbench-consecutiveloops-et-clang-tt average - - + - bfbench-imbricatedloops-et + bfbench-consecutiveloops-flat-clang-tt average - - bfbench-imbricatedloops-et + + bfbench-consecutiveloops-flat-clang-tt average - + - bfbench-imbricatedloops-flat - - bfbench-imbricatedloops-flat + + 1.2×106 - - - - bfbench-imbricatedloops-flat - - - bfbench-imbricatedloops-flat + + 1.3×106 - - - - - 1×106 + + 1.4×106 - - 2×106 + + 1.5×106 - - 3×106 + + 1.6×106 - - 4×106 + + 1.7×106 - - 5×106 + + 1.8×106 - - 6×106 + + 1.9×106 - - 7×106 - - - - - 8×106 - - - - - 9×106 + + 2×106 - - 1×107 + + 2.1×106 - - 1.1×107 + 2.2×106 - + 1 - + 2 - + 3 - + 4 - + 5 - + 6 - + 7 - + 8 + + + 9 + + + + + 10 + + - + diff --git a/docs/images/Frontend.svg b/docs/images/Frontend.svg new file mode 100644 index 0000000..c4d3482 --- /dev/null +++ b/docs/images/Frontend.svg @@ -0,0 +1,262 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + bfbench-consecutiveloops-et-clang-tt average + + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + Timings + + + + + bfbench-consecutiveloops-et-clang-tt average + + + bfbench-consecutiveloops-et-clang-tt average + + + + + + bfbench-consecutiveloops-flat-clang-tt average + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + + 500000 + + + + + 550000 + + + + + 600000 + + + + + 650000 + + + + + 700000 + + + + + 750000 + + + + + 800000 + + + + + 850000 + + + + + 900000 + + + + + 950000 + + + + + 1×106 + + + + + 1.05×106 + + + + + 1 + + + + + 2 + + + + + 3 + + + + + 4 + + + + + 5 + + + + + 6 + + + + + 7 + + + + + 8 + + + + + 9 + + + + + 10 + + + + + + + + + + + + + diff --git a/docs/images/InstantiateFunction/run_programprogram_string.svg b/docs/images/InstantiateFunction/run_programprogram_string.svg new file mode 100644 index 0000000..169a08e --- /dev/null +++ b/docs/images/InstantiateFunction/run_programprogram_string.svg @@ -0,0 +1,237 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + bfbench-consecutiveloops-et-clang-tt average + + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + Timings + + + + + bfbench-consecutiveloops-et-clang-tt average + + + bfbench-consecutiveloops-et-clang-tt average + + + + + + bfbench-consecutiveloops-flat-clang-tt average + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + + 0 + + + + + 20000 + + + + + 40000 + + + + + 60000 + + + + + 80000 + + + + + 100000 + + + + + 120000 + + + + + 140000 + + + + + 160000 + + + + + 180000 + + + + + 200000 + + + + + 1 + + + + + 2 + + + + + 3 + + + + + 4 + + + + + 5 + + + + + 6 + + + + + 7 + + + + + 8 + + + + + 9 + + + + + 10 + + + + + + + + + + + + + diff --git a/docs/images/Total_Frontend.svg b/docs/images/Total_Frontend.svg new file mode 100644 index 0000000..26742ae --- /dev/null +++ b/docs/images/Total_Frontend.svg @@ -0,0 +1,262 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + bfbench-consecutiveloops-et-clang-tt average + + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + Timings + + + + + bfbench-consecutiveloops-et-clang-tt average + + + bfbench-consecutiveloops-et-clang-tt average + + + + + + bfbench-consecutiveloops-flat-clang-tt average + + + bfbench-consecutiveloops-flat-clang-tt average + + + + + + + + 1×106 + + + + + 1.1×106 + + + + + 1.2×106 + + + + + 1.3×106 + + + + + 1.4×106 + + + + + 1.5×106 + + + + + 1.6×106 + + + + + 1.7×106 + + + + + 1.8×106 + + + + + 1.9×106 + + + + + 2×106 + + + + + 2.1×106 + + + + + 1 + + + + + 2 + + + + + 3 + + + + + 4 + + + + + 5 + + + + + 6 + + + + + 7 + + + + + 8 + + + + + 9 + + + + + 10 + + + + + + + + + + + + + diff --git a/docs/images/Total_InstantiateFunction.svg b/docs/images/Total_InstantiateFunction.svg index 3c9097a..b4bcd1b 100644 --- a/docs/images/Total_InstantiateFunction.svg +++ b/docs/images/Total_InstantiateFunction.svg @@ -44,134 +44,125 @@ - - - + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et average + bfbench-consecutiveloops-et-clang-tt average - + - bfbench-consecutiveloops-flat average + bfbench-consecutiveloops-flat-clang-tt average - - - bfbench-imbricatedloops-et average - - - - bfbench-imbricatedloops-flat average - - + - - + + Timings - bfbench-consecutiveloops-et average + bfbench-consecutiveloops-et-clang-tt average - - bfbench-consecutiveloops-et average + + bfbench-consecutiveloops-et-clang-tt average - + - bfbench-consecutiveloops-flat average + bfbench-consecutiveloops-flat-clang-tt average - - bfbench-consecutiveloops-flat average + + bfbench-consecutiveloops-flat-clang-tt average - + - bfbench-imbricatedloops-et average - - bfbench-imbricatedloops-et average + + 100000 - + + 120000 - bfbench-imbricatedloops-flat average + - - bfbench-imbricatedloops-flat average + + 140000 - + + 160000 + - - 100000 + + 180000 - - 150000 + + 200000 - - 200000 + + 220000 - - 250000 + + 240000 - - 300000 + + 260000 - - 350000 + + 280000 + + + + + 300000 @@ -180,56 +171,56 @@ - + 2 - + 3 - + 4 - + 5 - + 6 - + 7 - + 8 - + 9 - + 10 @@ -238,7 +229,7 @@ - + From 145458f29d042119170753e60d9f90deeb5d1b7f Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 9 Jan 2023 13:45:01 +0100 Subject: [PATCH 08/43] Added figures in practical examples --- paper.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/paper.md b/paper.md index a6774aa..bd64cae 100644 --- a/paper.md +++ b/paper.md @@ -93,6 +93,13 @@ experience on code generation using constexpr allocated memory, studying and overcoming the roadblocks, and evaluating the compile-time impact of the involved techniques. +![Total_InstantiateFunction](docs/images/Total_InstantiateFunction.svg) +![Total_Frontend](docs/images/Total_Frontend.svg) +![run_programprogram_string](docs/images/InstantiateFunction/run_programprogram_string.svg) +![Frontend](docs/images/Frontend.svg) +![ExecuteCompiler](docs/images/ExecuteCompiler.svg) +![Backend](docs/images/Backend.svg) + + +This paper introduces ctbench, which is a set of tools for compile-time +benchmarking and analysis in C++. It aims to provide developer-friendly tools to +declare and run benchmarks, then aggregate, filter out, and plot the data to +analyze it. + + + +We'll first have a look at current tools for compile-time profiling and +benchmarking and establish the limits of what these tools can do. + +# State of art That increase in compute needs raises the question on how to measure the impact of metaprogramming techniques on compile times. There are a lot of tools to run @@ -43,7 +55,7 @@ various compilation steps, which can be enabled by passing the `-ftime-trace` flag. Its output contains data that can be directly linked to symbols in the source code, making it easier to study the impact of specific symbols on various stages of compilation. The output format is a JSON file meant to be compatible -with Chrome's flame graph visualizer, that contains a series of timed events +with Chrome's flame graph visualizer, that contains a series of time events with optional metadata like the (mangled) C++ symbol or the file related to an event. @@ -84,13 +96,6 @@ benchmarking, making comprehensive benchmark quick and easy, and the only compile-time benchmarking tool that can gater Clang profiling data for scaling analysis. -# Statement of interest - -ctbench was first presented at the CPPP 2021 conference[@ctbench-cppp21] which -is the main C++ technical conference in France. It is being used to benchmark -examples from the poacher[@poacher] project, which was briefly presented at the -Meeting C++ 2022[@meetingcpp22] technical conference. - # Practical examples Poacher is a series of experimental projects meant to help us understanding what @@ -130,6 +135,13 @@ the hierarchy of Clang's timer events using flame graph visualizers as events might overlap each other. Also note that the hierarchy of events can vary from a benchmark case to another within a same benchmark category. +# Statement of interest + +ctbench was first presented at the CPPP 2021 conference[@ctbench-cppp21] which +is the main C++ technical conference in France. It is being used to benchmark +examples from the poacher[@poacher] project, which was briefly presented at the +Meeting C++ 2022[@meetingcpp22] technical conference. + +reflection[@static-reflection]. + +However there is no clear cut methodology to compare metaprogramming strategies This paper introduces ctbench, which is a set of tools for compile-time benchmarking and analysis in C++. It aims to provide developer-friendly tools to declare and run benchmarks, then aggregate, filter out, and plot the data to analyze it. +As such, ctbench is meant to become the first layer for proper scientific +methodology for analyzing compile-time program behavior. + We'll first have a look at current tools for compile-time profiling and benchmarking and establish the limits of what these tools can do. -# State of art +# Statement of need + +template mp c'est le staple de C++ pour avoir du haut niveau et des perfs + +tradeoff: temps de compil, car ce qu'on fait pas au runtime, on le fait en +partie la compil + +comme la mp est plus simple depuis C++11 et C++17, la mp devient plus courante +et les devs se retrouvent avec des temps de compil plus longs sans savoir +expliquer pourquoi + +la mesure des temps de compil devient importante, et il devient important d'en connaitre les raisons + +une premiere generation d'outils existe qui permet de faire du benchmarking en mesurant le temps de compil: -That increase in compute needs raises the question on how to measure the impact +- metabench permet ceci machin +- buildbench permet de faire du microbenchmarking sur des cas triviaux... + + +un outil existe et permet d'obtenir des donnees internes au compilo: + +- templight permet de faire du profiling single-case, avec des outils de debugging + +l'idee de templight a plus ou moins ete reprise dans clang avec time-trace, qui sort des fichiers dans un format visualisable via chrome etc... + +The increase in compile-time compute needs raises the question on how to measure the impact of metaprogramming techniques on compile times. There are a lot of tools to run benchmarks for "runtime" programs, but as of today, only Metabench[@metabench] is capable of running compile-time benchmarks instantiated at several sizes to @@ -66,17 +94,25 @@ The events can then be visualized using tools such as Google's benchmark case with the expression template backend]( docs/images/perfetto-ui.png) +time-trace fournit des mesures tres fines et tres exhaustives. l'ideal serait +d'avoir un outil similaire a metabench qui permette facilement d'analyser les +donnees issues de time-trace, et premettant de faire de l'analyse sur des cas de +taille variable + # Statement of need Originally inspired by Metabench[@metabench], ctbench development was driven by the need for a similar tool that allows the observation of Clang's time-trace files to help get a more comprehensive view on the impact of -metaprogramming techniques on compile times. +metaprogramming techniques on compile times. A strong emphasis was put on +developer friendliness, project integration, and component reusability. + +ctbench provides: -A strong emphasis was put on developer friendliness, project integration, and -component reusability. ctbench provides a well documented CMake API for -benchmark declaration, allows benchmark generation using the C++ pre-processor, -and its C++ core can be used as a shared C++ library as well. +- a well documented CMake API for benchmark declaration, which can be generated + using the C++ pre-processor, +- a set of JSON-configurable plotters with customizable data aggretation + features, which can be reused as a C++ library The core library provides data representations to handle benchmarks cases instantited at several sizes, each instance being repeated at least once. It From 52c60dfc1dcfb40c9c4bc9316a3b3de4d55ab9bb Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 11 Jan 2023 11:24:24 +0100 Subject: [PATCH 22/43] Fixed subtitle --- paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper.md b/paper.md index 91c81eb..5ba53cc 100644 --- a/paper.md +++ b/paper.md @@ -99,7 +99,7 @@ d'avoir un outil similaire a metabench qui permette facilement d'analyser les donnees issues de time-trace, et premettant de faire de l'analyse sur des cas de taille variable -# Statement of need +# Functionality Originally inspired by Metabench[@metabench], ctbench development was driven by the need for a similar tool that allows the observation of Clang's From 1c365d77b8b933842c8f94e37bd36d227e11841f Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 11 Jan 2023 11:28:13 +0100 Subject: [PATCH 23/43] Reworked functionality text structure --- paper.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paper.md b/paper.md index 5ba53cc..91793d1 100644 --- a/paper.md +++ b/paper.md @@ -111,6 +111,10 @@ ctbench provides: - a well documented CMake API for benchmark declaration, which can be generated using the C++ pre-processor, + +Although CMake is not a proper programming language, it is used as the main API +for ctbench as most C++ developers are already familiar with it. + - a set of JSON-configurable plotters with customizable data aggretation features, which can be reused as a C++ library From 61b7a69f055357a7a338a1daedabaef493d20a0a Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 11 Jan 2023 11:29:59 +0100 Subject: [PATCH 24/43] Indentation for bullet list details --- paper.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/paper.md b/paper.md index 91793d1..1fb3f7f 100644 --- a/paper.md +++ b/paper.md @@ -112,20 +112,20 @@ ctbench provides: - a well documented CMake API for benchmark declaration, which can be generated using the C++ pre-processor, -Although CMake is not a proper programming language, it is used as the main API -for ctbench as most C++ developers are already familiar with it. + Although CMake is not a proper programming language, it is used as the main API + for ctbench as most C++ developers are already familiar with it. - a set of JSON-configurable plotters with customizable data aggretation features, which can be reused as a C++ library -The core library provides data representations to handle benchmarks cases -instantited at several sizes, each instance being repeated at least once. It -also provides tools to aggregate, filter, and sort data from time-trace events, -as well as various plotters that provide different aggregation and vizualisation -strategies. The plotters can generate files in various format thanks to the -Sciplot[@sciplot] library, and they are highly configurable through JSON -configuration files that are well documented. Default configuration files can be -generated using a dedicated CLI tool. + The core library provides data representations to handle benchmarks cases + instantited at several sizes, each instance being repeated at least once. It + also provides tools to aggregate, filter, and sort data from time-trace + events, as well as various plotters that provide different aggregation and + vizualisation strategies. The plotters can generate files in various format + thanks to the Sciplot[@sciplot] library, and they are highly configurable + through JSON configuration files that are well documented. Default + configuration files can be generated using a dedicated CLI tool. Even though ctbench was made to analyze Clang's time-trace events, it can also measure compiler execution time and report it in a synthetic time-trace file, From 920648b0b59697543d7623293ba7c2d6c1ca4760 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Thu, 12 Jan 2023 15:39:19 +0100 Subject: [PATCH 25/43] Redaction following notetaking with Joel --- paper.md | 120 +++++++++++++++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 62 deletions(-) diff --git a/paper.md b/paper.md index 1fb3f7f..a551b36 100644 --- a/paper.md +++ b/paper.md @@ -22,20 +22,19 @@ bibliography: paper.bib With metaprogrammed libraries like Eigen[@eigen], Blaze[@blazelib], or CTRE[@ctre] being developed, we're seeing increasing computing needs at compile -time. These compile-time computing needs might grow even further as C++ embeds -more features over time to support and extend this kind of practices, like -compile-time containers[@more-constexpr-containers] or static -reflection[@static-reflection]. - -However there is no clear cut methodology to compare metaprogramming strategies - -This paper introduces ctbench, which is a set of tools for compile-time +time. These needs might grow even larger as C++ embeds more features over time +to support and extend this kind of practices, like compile-time +containers[@more-constexpr-containers] or static reflection[@static-reflection]. +However there is still no clear cut methodology to compare the performance +impact of different metaprogramming strategies. Moreover, new C++ features might +allow for new techniques that could offer better compile-time performance. +However, these claims still remain to be proven with proper methodology. + +This paper introduces **ctbench**, which is a set of tools for compile-time benchmarking and analysis in C++. It aims to provide developer-friendly tools to declare and run benchmarks, then aggregate, filter out, and plot the data to -analyze it. - -As such, ctbench is meant to become the first layer for proper scientific -methodology for analyzing compile-time program behavior. +analyze it. As such, **ctbench** is meant to become the first layer for proper +scientific methodology for analyzing compile-time program behavior. @@ -44,48 +43,34 @@ benchmarking and establish the limits of what these tools can do. # Statement of need -template mp c'est le staple de C++ pour avoir du haut niveau et des perfs - -tradeoff: temps de compil, car ce qu'on fait pas au runtime, on le fait en -partie la compil - -comme la mp est plus simple depuis C++11 et C++17, la mp devient plus courante -et les devs se retrouvent avec des temps de compil plus longs sans savoir -expliquer pourquoi - -la mesure des temps de compil devient importante, et il devient important d'en connaitre les raisons - -une premiere generation d'outils existe qui permet de faire du benchmarking en mesurant le temps de compil: - -- metabench permet ceci machin -- buildbench permet de faire du microbenchmarking sur des cas triviaux... - - -un outil existe et permet d'obtenir des donnees internes au compilo: - -- templight permet de faire du profiling single-case, avec des outils de debugging - -l'idee de templight a plus ou moins ete reprise dans clang avec time-trace, qui sort des fichiers dans un format visualisable via chrome etc... - -The increase in compile-time compute needs raises the question on how to measure the impact -of metaprogramming techniques on compile times. There are a lot of tools to run -benchmarks for "runtime" programs, but as of today, only Metabench[@metabench] -is capable of running compile-time benchmarks instantiated at several sizes to -measure compile-time scaling of metaprogramming techniques. Another tool called -Templight[@templight] has debugging and profiling capabilities for templates -using Clang, although it only works as a "one-shot" profiler, which can't be -used to study how metaprograms scale. Online compile-time benchmarking tool -Build-Bench[@buildbench] is available too, but only allows simple A/B -comparisons by measuring compiler execution time. - -Clang has a built-in profiler that provides in-depth time measurements of -various compilation steps, which can be enabled by passing the `-ftime-trace` -flag. Its output contains data that can be directly linked to symbols in the -source code, making it easier to study the impact of specific symbols on various -stages of compilation. The output format is a JSON file meant to be compatible -with Chrome's flame graph visualizer, that contains a series of time events -with optional metadata like the (mangled) C++ symbol or the file related to an -event. +C++ template metaprogramming raised interest for allowing computing libraries to +offer great performance with a very high level of abstraction. As a tradeoff for +interpreting representations of calculations at runtime, they are represented at +compile-time, and transformed directly into their own programs. + +As metaprogramming became easier with C++11 and C++17, it became more mainstream +and consequently, developers have to bear with longer compilation times without +being able to explain them. Therefore being able to measure compilation times is +increasingly important, and being able to explain them as well. A first +generation of tools aims to tackle this issue with their own specific +methodologies: + +- Buildbench[@buildbench] measures compiler execution times for basic + A-B compile-time comparisons in a web browser, +- Metabench[@metabench] instantiates variably sized benchmarks using embedded + Ruby (ERB) templating and plots compiler execution time, allowing scaling + analyses of metaprograms, +- Templight[@templight] adds Clang template instantiation inspection + capabilities with debugging and profiling tools. + +Additionally, Clang has a built-in profiler that provides in-depth time +measurements of various compilation steps, which can be enabled by passing the +`-ftime-trace` flag. Its output contains data that can be directly linked to +symbols in the source code, making it easier to study the impact of specific +symbols on various stages of compilation. The output format is a JSON file meant +to be compatible with Chrome's flame graph visualizer, that contains a series of +time events with optional metadata like the (mangled) C++ symbol or the file +related to an event. The events can then be visualized using tools such as Google's [Perfetto UI](https://ui.perfetto.dev/). @@ -101,19 +86,19 @@ taille variable # Functionality -Originally inspired by Metabench[@metabench], ctbench development was +Originally inspired by Metabench[@metabench], **ctbench** development was driven by the need for a similar tool that allows the observation of Clang's time-trace files to help get a more comprehensive view on the impact of metaprogramming techniques on compile times. A strong emphasis was put on developer friendliness, project integration, and component reusability. -ctbench provides: +**ctbench** provides: - a well documented CMake API for benchmark declaration, which can be generated using the C++ pre-processor, Although CMake is not a proper programming language, it is used as the main API - for ctbench as most C++ developers are already familiar with it. + for **ctbench** as most C++ developers are already familiar with it. - a set of JSON-configurable plotters with customizable data aggretation features, which can be reused as a C++ library @@ -127,17 +112,28 @@ ctbench provides: through JSON configuration files that are well documented. Default configuration files can be generated using a dedicated CLI tool. -Even though ctbench was made to analyze Clang's time-trace events, it can also -measure compiler execution time and report it in a synthetic time-trace file, -making it partially compatible with GCC as well. +Even though **ctbench** was made to analyze Clang's time-trace events, it can +also measure compiler execution time and report it in a synthetic time-trace +file, making it partially compatible with GCC as well. -All these features make ctbench a very complete toolkit for compile-time +Meme si la premiere fonctionnalite de ctbench c'est de faire du time-trace, on +fournit aussi des outils permettant de faire de la comparaison entre differents +compilateurs (ou diffrentes versions d'un compilo). GCC ne supportant pas +time-trace, ctbench permet egalement de mesurer le temps d'execution du +compilateur et genere un fichier au format time-trace pour assurer +l'interoperabilite avec les outils d'aggregation de donnees etc... + +All these features make **ctbench** a very complete toolkit for compile-time benchmarking, making comprehensive benchmark quick and easy, and the only compile-time benchmarking tool that can gater Clang profiling data for scaling analysis. # Practical examples +rajouter un exemple simple avec variadiques recursifs vs parameter pack (avec le +code C++) et montrer l'analyse rapide, ensuite enoncer simplement poacher et +rule of cheese + Poacher is a series of experimental projects meant to help us understanding what metaprogramming could be thanks to new C++ features such as non-transient constexpr memory allocation[@constexpr-memory]. It helped us getting hands-on @@ -177,7 +173,7 @@ benchmark case to another within a same benchmark category. # Statement of interest -ctbench was first presented at the CPPP 2021 conference[@ctbench-cppp21] which +**ctbench** was first presented at the CPPP 2021 conference[@ctbench-cppp21] which is the main C++ technical conference in France. It is being used to benchmark examples from the poacher[@poacher] project, which was briefly presented at the Meeting C++ 2022[@meetingcpp22] technical conference. From 10a377ae6902bf1b444b48ed00419351f3d4605b Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Thu, 12 Jan 2023 15:47:47 +0100 Subject: [PATCH 26/43] Added date for metabench citation --- paper.bib | 1 + 1 file changed, 1 insertion(+) diff --git a/paper.bib b/paper.bib index 83b98fd..1480a4a 100644 --- a/paper.bib +++ b/paper.bib @@ -28,6 +28,7 @@ @article{more-constexpr-containers @misc{metabench, title = {Metabench: A simple framework for compile-time microbenchmarks}, author = {Dionne, Louis and Dutra, Bruno and Holmes, Odin and others}, + year = {2017}, url = {https://github.com/ldionne/metabench/}, } From 4be157f93edcd88517d60a3696f77ba81df7dce4 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Thu, 12 Jan 2023 15:48:15 +0100 Subject: [PATCH 27/43] Minor changes to the draft --- paper.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/paper.md b/paper.md index a551b36..715083b 100644 --- a/paper.md +++ b/paper.md @@ -79,10 +79,10 @@ The events can then be visualized using tools such as Google's benchmark case with the expression template backend]( docs/images/perfetto-ui.png) -time-trace fournit des mesures tres fines et tres exhaustives. l'ideal serait -d'avoir un outil similaire a metabench qui permette facilement d'analyser les -donnees issues de time-trace, et premettant de faire de l'analyse sur des cas de -taille variable + # Functionality @@ -97,8 +97,8 @@ developer friendliness, project integration, and component reusability. - a well documented CMake API for benchmark declaration, which can be generated using the C++ pre-processor, - Although CMake is not a proper programming language, it is used as the main API - for **ctbench** as most C++ developers are already familiar with it. + Although CMake is not a proper programming language, it is used as the main + API for **ctbench** as most C++ developers are already familiar with it. - a set of JSON-configurable plotters with customizable data aggretation features, which can be reused as a C++ library From b762554a994bd902f66e5ab952a7144003380e8a Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 16 Jan 2023 13:09:47 +0100 Subject: [PATCH 28/43] Added example benchmark --- example/CMakeLists.txt | 48 ++++++++++++++++++++++ example/CMakePresets.json | 65 ++++++++++++++++++++++++++++++ example/compare-all.json | 15 +++++++ example/variadic_sum/expansion.cpp | 16 ++++++++ example/variadic_sum/recursive.cpp | 19 +++++++++ 5 files changed, 163 insertions(+) create mode 100644 example/CMakeLists.txt create mode 100644 example/CMakePresets.json create mode 100644 example/compare-all.json create mode 100644 example/variadic_sum/expansion.cpp create mode 100644 example/variadic_sum/recursive.cpp diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 0000000..454140e --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,48 @@ +# Simple standalone project for compile-time benchmarking using ctbench + +# Usage: + +# ```sh +# cmake --preset release +# cmake --build --preset release +# ``` + +# CMake presets use clang/clang++ by default with time-trace enabled. + +cmake_minimum_required(VERSION 3.25) +project(example-project) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_CXX_STANDARD 20) + +find_package(ctbench REQUIRED) + +add_compile_options( + -Wall + -Wextra + -Werror + -Wnull-dereference + -Wold-style-cast + -Wdouble-promotion + -Wshadow) + +if(ENABLE_TIME_TRACE) + add_compile_options(-ftime-trace -ftime-trace-granularity=1 + -fconstexpr-steps=2147483647 -fbracket-depth=2147483647) +endif() + +set(BENCHMARK_START 1 CACHE STRING "Benchmark size minimum") +set(BENCHMARK_STOP 16 CACHE STRING "Benchmark size maximum") +set(BENCHMARK_STEP 1 CACHE STRING "Benchmark size step") +set(BENCHMARK_ITERATIONS 1 CACHE STRING "Number of samples per size") + +ctbench_add_benchmark( + variadic_sum.expansion variadic_sum/expansion.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) + +ctbench_add_benchmark( + variadic_sum.recursive variadic_sum/recursive.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) + +ctbench_add_graph(variadic_sum-compare-graph compare-all.json + variadic_sum.expansion variadic_sum.recursive) diff --git a/example/CMakePresets.json b/example/CMakePresets.json new file mode 100644 index 0000000..0632b03 --- /dev/null +++ b/example/CMakePresets.json @@ -0,0 +1,65 @@ +{ + "version" : 4, + "cmakeMinimumRequired" : { + "major" : 3, + "minor" : 25, + "patch" : 0 + }, + "configurePresets" : [ + { + "name" : "dev", + "displayName" : "Dev build", + "description" : "Provides a compile_commands.json file", + "generator" : "Ninja", + "binaryDir" : "${sourceDir}/build", + "cacheVariables" : { + "ENABLE_TIME_TRACE" : "ON", + "CMAKE_CXX_COMPILER" : "clang++", + "CMAKE_C_COMPILER" : "clang", + "CMAKE_EXPORT_COMPILE_COMMANDS" : "ON", + "CMAKE_BUILD_TYPE" : "RelWithDebInfo" + } + }, + { + "name" : "release", + "displayName" : "Release", + "description" : "Release", + "generator" : "Ninja", + "binaryDir" : "${sourceDir}/build/release", + "cacheVariables" : { + "ENABLE_TIME_TRACE" : "ON", + "CMAKE_CXX_COMPILER" : "clang++", + "CMAKE_C_COMPILER" : "clang", + "CMAKE_EXPORT_COMPILE_COMMANDS" : "ON", + "CMAKE_BUILD_TYPE" : "RelWithDebInfo" + } + }, + { + "name" : "debug", + "inherits" : "release", + "displayName" : "Debug", + "description" : "Debug", + "binaryDir" : "${sourceDir}/build/debug", + "cacheVariables" : { + "CMAKE_BUILD_TYPE" : "Debug" + } + } + ], + "buildPresets" : [ + { + "name" : "release", + "configurePreset" : "release", + "nativeToolOptions" : [ + "-j1" + ], + "targets" : [ + "ctbench-graph-all" + ] + }, + { + "name" : "debug", + "inherits" : "release", + "configurePreset" : "debug" + } + ] +} diff --git a/example/compare-all.json b/example/compare-all.json new file mode 100644 index 0000000..38e088c --- /dev/null +++ b/example/compare-all.json @@ -0,0 +1,15 @@ +{ + "plotter": "compare_by", + "legend_title": "Timings", + "x_label": "Benchmark size factor", + "y_label": "Time (µs)", + "draw_average": true, + "demangle": false, + "draw_points": false, + "width": 800, + "height": 400, + "key_ptrs": ["/name", "/args/detail"], + "_key_ptrs": ["/name"], + "value_ptr": "/dur", + "plot_file_extensions": [".svg"] +} diff --git a/example/variadic_sum/expansion.cpp b/example/variadic_sum/expansion.cpp new file mode 100644 index 0000000..338e494 --- /dev/null +++ b/example/variadic_sum/expansion.cpp @@ -0,0 +1,16 @@ +#include + +/// Compile-time unsigned int +template struct ct_uint_t { static constexpr unsigned value = N; }; + +/// Expansion compile-time sum implementation +template auto sum(Ts const &...) { return (Ts::value + ...); } + +// Driver code + +#define GEN_MACRO(Z, N, TEXT) \ + TEXT {} + +unsigned foo() { + return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); +} diff --git a/example/variadic_sum/recursive.cpp b/example/variadic_sum/recursive.cpp new file mode 100644 index 0000000..1b571c6 --- /dev/null +++ b/example/variadic_sum/recursive.cpp @@ -0,0 +1,19 @@ +#include + +/// Compile-time unsigned int +template struct ct_uint_t { static constexpr unsigned value = N; }; + +/// Recursive compile-time sum implementation +template auto sum(T const &) { return T::value; } +template auto sum(T const &, Ts const &...tl) { + return T::value + sum(tl...); +} + +// Driver code + +#define GEN_MACRO(Z, N, TEXT) \ + TEXT {} + +unsigned foo() { + return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); +} From 799f290b8490d3b70de55e60463caa4edbd94f6a Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 16 Jan 2023 13:10:35 +0100 Subject: [PATCH 29/43] Added reference to time-trace pull request --- paper.bib | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/paper.bib b/paper.bib index 1480a4a..fe83360 100644 --- a/paper.bib +++ b/paper.bib @@ -96,6 +96,14 @@ @online{ctbench-cppp21 url = {https://www.youtube.com/watch?v=1RZY6skM0Rc}, } +@online{time-trace, + author = {Anton Afanasyev}, + title = {Adds `-ftime-trace` option to clang that produces Chrome + `chrome://tracing` compatible JSON profiling output dumps}, + year = {2019}, + url = {https://reviews.llvm.org/D58675}, +} + @online{meetingcpp22, author = {Paul {Keir}, Joel {Falcou}, Jules {Penuchot}, Andrew {Gozillon}}, title = {Meeting C++ - A totally constexpr standard library}, From ef95e120b00704b2cde4c5dcad34006265a1754a Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 16 Jan 2023 13:11:28 +0100 Subject: [PATCH 30/43] Authoring and advancement on paper draft --- paper.md | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/paper.md b/paper.md index 715083b..8eeac35 100644 --- a/paper.md +++ b/paper.md @@ -11,8 +11,12 @@ authors: orcid: 0000-0002-6377-6880 equal-contrib: true affiliation: 1 + - name: Joel Falcou + orcid: 0000-0001-5380-7375 + equal-contrib: true + affiliation: 1 affiliations: - - name: Jules Penuchot, LISN, Paris-Saclay University, France + - name: Université Paris-Saclay, CNRS, Laboratoire Interdisciplinaire des Sciences du Numérique, 91400, Orsay, France index: 1 date: 07 December 2023 bibliography: paper.bib @@ -63,9 +67,9 @@ methodologies: - Templight[@templight] adds Clang template instantiation inspection capabilities with debugging and profiling tools. -Additionally, Clang has a built-in profiler that provides in-depth time -measurements of various compilation steps, which can be enabled by passing the -`-ftime-trace` flag. Its output contains data that can be directly linked to +Additionally, Clang has a built-in profiler[@time-trace] that provides in-depth +time measurements of various compilation steps, which can be enabled by passing +the `-ftime-trace` flag. Its output contains data that can be directly linked to symbols in the source code, making it easier to study the impact of specific symbols on various stages of compilation. The output format is a JSON file meant to be compatible with Chrome's flame graph visualizer, that contains a series of @@ -112,16 +116,17 @@ developer friendliness, project integration, and component reusability. through JSON configuration files that are well documented. Default configuration files can be generated using a dedicated CLI tool. -Even though **ctbench** was made to analyze Clang's time-trace events, it can -also measure compiler execution time and report it in a synthetic time-trace +Despite the fact that **ctbench** was made to handle Clang's time-trace events, +it can also measure compiler execution time and report it in a synthetic time-trace file, making it partially compatible with GCC as well. -Meme si la premiere fonctionnalite de ctbench c'est de faire du time-trace, on -fournit aussi des outils permettant de faire de la comparaison entre differents -compilateurs (ou diffrentes versions d'un compilo). GCC ne supportant pas -time-trace, ctbench permet egalement de mesurer le temps d'execution du -compilateur et genere un fichier au format time-trace pour assurer -l'interoperabilite avec les outils d'aggregation de donnees etc... +In addition to **ctbench**'s time-trace handling, it has a compatibility mode +for compilers that do not support it like GCC. This mode works by measuring +compiler execution time just like Metabench and generating a time-trace file +that contains compiler execution time. Moreover, the tooling makes defining +compilers per-target possible within a CMake project, allowing black-box +compiler performance comparisons between GCC and Clang for example or +comparisons between different versions of a compiler. All these features make **ctbench** a very complete toolkit for compile-time benchmarking, making comprehensive benchmark quick and easy, and the only From b845af88956a9486011748e1fc9fa14af9a0f80f Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 16 Jan 2023 17:11:07 +0100 Subject: [PATCH 31/43] Fixes and improvements for the benchmarks --- example/variadic_sum/expansion.cpp | 8 ++++++-- example/variadic_sum/recursive.cpp | 9 ++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/example/variadic_sum/expansion.cpp b/example/variadic_sum/expansion.cpp index 338e494..227c28a 100644 --- a/example/variadic_sum/expansion.cpp +++ b/example/variadic_sum/expansion.cpp @@ -4,13 +4,17 @@ template struct ct_uint_t { static constexpr unsigned value = N; }; /// Expansion compile-time sum implementation -template auto sum(Ts const &...) { return (Ts::value + ...); } +template constexpr unsigned sum(Ts const &...) { + return (Ts::value + ...); +} // Driver code #define GEN_MACRO(Z, N, TEXT) \ TEXT {} -unsigned foo() { +constexpr unsigned foo() { return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); } + +[[maybe_unused]] constexpr unsigned result = foo(); diff --git a/example/variadic_sum/recursive.cpp b/example/variadic_sum/recursive.cpp index 1b571c6..84ca336 100644 --- a/example/variadic_sum/recursive.cpp +++ b/example/variadic_sum/recursive.cpp @@ -4,8 +4,9 @@ template struct ct_uint_t { static constexpr unsigned value = N; }; /// Recursive compile-time sum implementation -template auto sum(T const &) { return T::value; } -template auto sum(T const &, Ts const &...tl) { +template constexpr auto sum(T const &) { return T::value; } +template +constexpr auto sum(T const &, Ts const &...tl) { return T::value + sum(tl...); } @@ -14,6 +15,8 @@ template auto sum(T const &, Ts const &...tl) { #define GEN_MACRO(Z, N, TEXT) \ TEXT {} -unsigned foo() { +constexpr unsigned foo() { return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); } + +[[maybe_unused]] constexpr unsigned result = foo(); From 84fd959072e43ba0d4a390131a6faa15b01412b5 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 16 Jan 2023 17:11:47 +0100 Subject: [PATCH 32/43] Increased repetition number and max benchmark size --- example/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 454140e..54d0ed9 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -32,9 +32,9 @@ if(ENABLE_TIME_TRACE) endif() set(BENCHMARK_START 1 CACHE STRING "Benchmark size minimum") -set(BENCHMARK_STOP 16 CACHE STRING "Benchmark size maximum") +set(BENCHMARK_STOP 64 CACHE STRING "Benchmark size maximum") set(BENCHMARK_STEP 1 CACHE STRING "Benchmark size step") -set(BENCHMARK_ITERATIONS 1 CACHE STRING "Number of samples per size") +set(BENCHMARK_ITERATIONS 10 CACHE STRING "Number of samples per size") ctbench_add_benchmark( variadic_sum.expansion variadic_sum/expansion.cpp ${BENCHMARK_START} From 7fc38ff1f272d140144013c9bbdb930569c2ccd6 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Mon, 16 Jan 2023 17:12:18 +0100 Subject: [PATCH 33/43] Added simple benchmark example to the paper --- paper.md | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/paper.md b/paper.md index 8eeac35..5bfc38d 100644 --- a/paper.md +++ b/paper.md @@ -135,7 +135,108 @@ analysis. # Practical examples -rajouter un exemple simple avec variadiques recursifs vs parameter pack (avec le +This section will cover a short yet practical example of ctbench usage. We want +to calculate the sum of a series of integers known at compile-time, using a type +template to store unsigned integer values at compile-time. We need to include +`boost/preprocessor/repetition/enum.hpp` to scale the benchmark case as well: + +```cpp +#include + +/// Compile-time unsigned int +template struct ct_uint_t { static constexpr unsigned value = N; }; +``` + +The first version of the metaprogram is an implementation based on a recursive +template function: + +```cpp +/// Recursive compile-time sum implementation +template constexpr auto sum(T const &) { return T::value; } +template +constexpr auto sum(T const &, Ts const &...tl) { + return T::value + sum(tl...); +} +``` + +And the other version relies on C++11 parameter pack expansion: + +```cpp +/// Expansion compile-time sum implementation +template constexpr unsigned sum(Ts const &...) { + return (Ts::value + ...); +} +``` + +Both versions share the same interface, and thus the same driver code as well. +The driver code takes care of scaling the benchmark according to +`BENCHMARK_SIZE`, which is defined by **ctbench** through the CMake API: + +```cpp +// Driver code + +#define GEN_MACRO(Z, N, TEXT) \ + TEXT {} + +constexpr unsigned foo() { + return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); +} + +[[maybe_unused]] constexpr unsigned result = foo(); +``` + +The CMake code needed to run the benchmarks is the following: + +```cmake +ctbench_add_benchmark( + variadic_sum.expansion variadic_sum/expansion.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) + +ctbench_add_benchmark( + variadic_sum.recursive variadic_sum/recursive.cpp ${BENCHMARK_START} + ${BENCHMARK_STOP} ${BENCHMARK_STEP} ${BENCHMARK_ITERATIONS}) +``` + +Then a graph can be declared: + +```cmake +ctbench_add_graph(variadic_sum-compare-graph compare-all.json + variadic_sum.expansion variadic_sum.recursive) +``` + +with `compare-all.json` containing the following: + +```json +{ + "plotter": "compare_by", + "legend_title": "Timings", + "x_label": "Benchmark size factor", + "y_label": "Time (µs)", + "draw_average": true, + "demangle": false, + "draw_points": false, + "width": 800, + "height": 400, + "key_ptrs": ["/name", "/args/detail"], + "_key_ptrs": ["/name"], + "value_ptr": "/dur", + "plot_file_extensions": [".svg"] +} +``` + +This configuration file uses the `compare_by` plotter to generate one plot for +each pair of elements designated by the JSON pointers in `key_ptrs`, namely +`/name` and `/args/detail`. The first pointer designates an LLVM timer for a +particular section of code, and the second *may* refer to a C++ symbol, or a +C++ source filename. + +The result is a series of graphs, each one designating a particular timer event, +specific to a source or a symbol whenever it is possible (ie. whenever +additional data is present in the `/args/detail` value of a timer event). Each +graph compares the evolution of these timer events in function of the +instanciation size of the benchmark cases. + + # Statement of interest From 282ae7ad11e8d1290948b3eaeb81899287ce1bb6 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Tue, 17 Jan 2023 12:34:09 +0100 Subject: [PATCH 34/43] Updated graphs with provided example --- docs/images/Backend.svg | 233 ---------------- docs/images/ExecuteCompiler.svg | 216 ++++++--------- docs/images/Frontend.svg | 262 ------------------ docs/images/InstantiateFunction/foovoid.svg | 219 +++++++++++++++ .../run_programprogram_string.svg | 237 ---------------- docs/images/Total_Backend.svg | 234 ++++++++++++++++ docs/images/Total_Frontend.svg | 223 ++++++--------- docs/images/Total_InstantiateFunction.svg | 198 ++++++------- 8 files changed, 723 insertions(+), 1099 deletions(-) delete mode 100644 docs/images/Backend.svg delete mode 100644 docs/images/Frontend.svg create mode 100644 docs/images/InstantiateFunction/foovoid.svg delete mode 100644 docs/images/InstantiateFunction/run_programprogram_string.svg create mode 100644 docs/images/Total_Backend.svg diff --git a/docs/images/Backend.svg b/docs/images/Backend.svg deleted file mode 100644 index cb62bb3..0000000 --- a/docs/images/Backend.svg +++ /dev/null @@ -1,233 +0,0 @@ - - - -Gnuplot -Produced by GNUPLOT 5.4 patchlevel 5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Time (µs) - - - - - Benchmark size factor - - - - - bfbench-consecutiveloops-et-clang-tt average - - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - - - Timings - - - - - bfbench-consecutiveloops-et-clang-tt average - - - bfbench-consecutiveloops-et-clang-tt average - - - - - - bfbench-consecutiveloops-flat-clang-tt average - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - - - - 110000 - - - - - 120000 - - - - - 130000 - - - - - 140000 - - - - - 150000 - - - - - 160000 - - - - - 170000 - - - - - 180000 - - - - - 1 - - - - - 2 - - - - - 3 - - - - - 4 - - - - - 5 - - - - - 6 - - - - - 7 - - - - - 8 - - - - - 9 - - - - - 10 - - - - - - - - - - - - - diff --git a/docs/images/ExecuteCompiler.svg b/docs/images/ExecuteCompiler.svg index 3551f48..945430a 100644 --- a/docs/images/ExecuteCompiler.svg +++ b/docs/images/ExecuteCompiler.svg @@ -44,202 +44,166 @@ - - - + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et-clang-tt average + variadicsum.expansion average - + - bfbench-consecutiveloops-flat-clang-tt average + variadicsum.recursive average - + - - + + Timings - bfbench-consecutiveloops-et-clang-tt average + variadicsum.expansion average - - bfbench-consecutiveloops-et-clang-tt average + + variadicsum.expansion average - + - bfbench-consecutiveloops-flat-clang-tt average + variadicsum.recursive average - - bfbench-consecutiveloops-flat-clang-tt average + + variadicsum.recursive average - + - - 1.2×106 + + 60000 - - 1.3×106 + + 65000 - - 1.4×106 + + 70000 - - 1.5×106 + + 75000 - - 1.6×106 + + 80000 - - 1.7×106 + + 85000 - - 1.8×106 + + 0 - - 1.9×106 - - - - - 2×106 - - - - - 2.1×106 - - - - - 2.2×106 - - - - - 1 - - - - - 2 - - - - - 3 - - - - - 4 + + 10 - - 5 + + 20 - - 6 + + 30 - - 7 + + 40 - - 8 + + 50 - - 9 + + 60 - - 10 + + 70 @@ -247,7 +211,7 @@ - + diff --git a/docs/images/Frontend.svg b/docs/images/Frontend.svg deleted file mode 100644 index c4d3482..0000000 --- a/docs/images/Frontend.svg +++ /dev/null @@ -1,262 +0,0 @@ - - - -Gnuplot -Produced by GNUPLOT 5.4 patchlevel 5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Time (µs) - - - - - Benchmark size factor - - - - - bfbench-consecutiveloops-et-clang-tt average - - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - - - Timings - - - - - bfbench-consecutiveloops-et-clang-tt average - - - bfbench-consecutiveloops-et-clang-tt average - - - - - - bfbench-consecutiveloops-flat-clang-tt average - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - - - - 500000 - - - - - 550000 - - - - - 600000 - - - - - 650000 - - - - - 700000 - - - - - 750000 - - - - - 800000 - - - - - 850000 - - - - - 900000 - - - - - 950000 - - - - - 1×106 - - - - - 1.05×106 - - - - - 1 - - - - - 2 - - - - - 3 - - - - - 4 - - - - - 5 - - - - - 6 - - - - - 7 - - - - - 8 - - - - - 9 - - - - - 10 - - - - - - - - - - - - - diff --git a/docs/images/InstantiateFunction/foovoid.svg b/docs/images/InstantiateFunction/foovoid.svg new file mode 100644 index 0000000..4ab0e3e --- /dev/null +++ b/docs/images/InstantiateFunction/foovoid.svg @@ -0,0 +1,219 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + variadicsum.expansion average + + + + variadicsum.recursive average + + + + + + + Timings + + + + + variadicsum.expansion average + + + variadicsum.expansion average + + + + + + variadicsum.recursive average + + + variadicsum.recursive average + + + + + + + + 0 + + + + + 5000 + + + + + 10000 + + + + + 15000 + + + + + 20000 + + + + + 25000 + + + + + 0 + + + + + 10 + + + + + 20 + + + + + 30 + + + + + 40 + + + + + 50 + + + + + 60 + + + + + 70 + + + + + + + + + + + + + diff --git a/docs/images/InstantiateFunction/run_programprogram_string.svg b/docs/images/InstantiateFunction/run_programprogram_string.svg deleted file mode 100644 index 169a08e..0000000 --- a/docs/images/InstantiateFunction/run_programprogram_string.svg +++ /dev/null @@ -1,237 +0,0 @@ - - - -Gnuplot -Produced by GNUPLOT 5.4 patchlevel 5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Time (µs) - - - - - Benchmark size factor - - - - - bfbench-consecutiveloops-et-clang-tt average - - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - - - Timings - - - - - bfbench-consecutiveloops-et-clang-tt average - - - bfbench-consecutiveloops-et-clang-tt average - - - - - - bfbench-consecutiveloops-flat-clang-tt average - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - - - - 0 - - - - - 20000 - - - - - 40000 - - - - - 60000 - - - - - 80000 - - - - - 100000 - - - - - 120000 - - - - - 140000 - - - - - 160000 - - - - - 180000 - - - - - 200000 - - - - - 1 - - - - - 2 - - - - - 3 - - - - - 4 - - - - - 5 - - - - - 6 - - - - - 7 - - - - - 8 - - - - - 9 - - - - - 10 - - - - - - - - - - - - - diff --git a/docs/images/Total_Backend.svg b/docs/images/Total_Backend.svg new file mode 100644 index 0000000..86f929f --- /dev/null +++ b/docs/images/Total_Backend.svg @@ -0,0 +1,234 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Time (µs) + + + + + Benchmark size factor + + + + + variadicsum.expansion average + + + + variadicsum.recursive average + + + + + + + Timings + + + + + variadicsum.expansion average + + + variadicsum.expansion average + + + + + + variadicsum.recursive average + + + variadicsum.recursive average + + + + + + + + 1500 + + + + + 1550 + + + + + 1600 + + + + + 1650 + + + + + 1700 + + + + + 1750 + + + + + 1800 + + + + + 1850 + + + + + 0 + + + + + 10 + + + + + 20 + + + + + 30 + + + + + 40 + + + + + 50 + + + + + 60 + + + + + 70 + + + + + + + + + + + + + diff --git a/docs/images/Total_Frontend.svg b/docs/images/Total_Frontend.svg index 26742ae..7f186f2 100644 --- a/docs/images/Total_Frontend.svg +++ b/docs/images/Total_Frontend.svg @@ -44,209 +44,166 @@ - - - + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et-clang-tt average + variadicsum.expansion average - + - bfbench-consecutiveloops-flat-clang-tt average + variadicsum.recursive average - + - - + + Timings - bfbench-consecutiveloops-et-clang-tt average - - - bfbench-consecutiveloops-et-clang-tt average - - - - - - bfbench-consecutiveloops-flat-clang-tt average - - - bfbench-consecutiveloops-flat-clang-tt average - - - - - + variadicsum.expansion average - - 1×106 + + variadicsum.expansion average - - 1.1×106 + - + variadicsum.recursive average - - 1.2×106 + + variadicsum.recursive average - - 1.3×106 + - - - 1.4×106 + + 55000 - - 1.5×106 + + 60000 - - 1.6×106 + + 65000 - - 1.7×106 + + 70000 - - 1.8×106 + + 75000 - - 1.9×106 + + 80000 - - 2×106 + + 0 - - 2.1×106 - - - - - 1 - - - - - 2 - - - - - 3 - - - - - 4 + + 10 - - 5 + + 20 - - 6 + + 30 - - 7 + + 40 - - 8 + + 50 - - 9 + + 60 - - 10 + + 70 @@ -254,7 +211,7 @@ - + diff --git a/docs/images/Total_InstantiateFunction.svg b/docs/images/Total_InstantiateFunction.svg index b4bcd1b..78c778a 100644 --- a/docs/images/Total_InstantiateFunction.svg +++ b/docs/images/Total_InstantiateFunction.svg @@ -44,184 +44,166 @@ - - - + + + Time (µs) - + Benchmark size factor - bfbench-consecutiveloops-et-clang-tt average + variadicsum.expansion average - + - bfbench-consecutiveloops-flat-clang-tt average + variadicsum.recursive average - + - - + + Timings - bfbench-consecutiveloops-et-clang-tt average + variadicsum.expansion average - - bfbench-consecutiveloops-et-clang-tt average + + variadicsum.expansion average - + - bfbench-consecutiveloops-flat-clang-tt average + variadicsum.recursive average - - bfbench-consecutiveloops-flat-clang-tt average + + variadicsum.recursive average - + - - 100000 + + 0 - - 120000 + + 5000 - - 140000 + + 10000 - - 160000 + + 15000 - - 180000 + + 20000 - - 200000 + + 25000 - - 220000 + + 0 - - 240000 - - - - - 260000 - - - - - 280000 - - - - - 300000 - - - - - 1 - - - - - 2 - - - - - 3 - - - - - 4 + + 10 - - 5 + + 20 - - 6 + + 30 - - 7 + + 40 - - 8 + + 50 - - 9 + + 60 - - 10 + + 70 @@ -229,7 +211,7 @@ - + From a6e7b69b112ec566e9c19519298e10e72bc18b69 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Tue, 17 Jan 2023 12:35:04 +0100 Subject: [PATCH 35/43] Added size zero for baseline --- example/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 54d0ed9..a85d028 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -31,7 +31,7 @@ if(ENABLE_TIME_TRACE) -fconstexpr-steps=2147483647 -fbracket-depth=2147483647) endif() -set(BENCHMARK_START 1 CACHE STRING "Benchmark size minimum") +set(BENCHMARK_START 0 CACHE STRING "Benchmark size minimum") set(BENCHMARK_STOP 64 CACHE STRING "Benchmark size maximum") set(BENCHMARK_STEP 1 CACHE STRING "Benchmark size step") set(BENCHMARK_ITERATIONS 10 CACHE STRING "Number of samples per size") From 0525fc4cfb3061393fe44bdb4214eea97704ee51 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Tue, 17 Jan 2023 12:35:23 +0100 Subject: [PATCH 36/43] Config cleanup --- example/compare-all.json | 1 - 1 file changed, 1 deletion(-) diff --git a/example/compare-all.json b/example/compare-all.json index 38e088c..7cb5bd3 100644 --- a/example/compare-all.json +++ b/example/compare-all.json @@ -9,7 +9,6 @@ "width": 800, "height": 400, "key_ptrs": ["/name", "/args/detail"], - "_key_ptrs": ["/name"], "value_ptr": "/dur", "plot_file_extensions": [".svg"] } From 2c3ad995544d788bea6be7f8c07a45ce28abeb7d Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Tue, 17 Jan 2023 12:36:10 +0100 Subject: [PATCH 37/43] Support for benchmark size 0, turned benchmark driver into a template function to make its instantiation measurable --- example/variadic_sum/expansion.cpp | 29 ++++++++++++++++++----------- example/variadic_sum/recursive.cpp | 29 ++++++++++++++++++----------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/example/variadic_sum/expansion.cpp b/example/variadic_sum/expansion.cpp index 227c28a..a67b953 100644 --- a/example/variadic_sum/expansion.cpp +++ b/example/variadic_sum/expansion.cpp @@ -1,20 +1,27 @@ -#include +#include -/// Compile-time unsigned int -template struct ct_uint_t { static constexpr unsigned value = N; }; +/// Compile-time std::size_t +template struct ct_uint_t { + static constexpr std::size_t value = N; +}; /// Expansion compile-time sum implementation -template constexpr unsigned sum(Ts const &...) { - return (Ts::value + ...); +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } + +template constexpr auto sum(Ts const &...) { + return ct_uint_t<(Ts::value + ... + 0)>{}; } // Driver code -#define GEN_MACRO(Z, N, TEXT) \ - TEXT {} - -constexpr unsigned foo() { - return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); +template constexpr auto foo() { + return [](std::index_sequence) { + return sum(ct_uint_t{}...); + } + (std::make_index_sequence{}); } -[[maybe_unused]] constexpr unsigned result = foo(); +[[maybe_unused]] constexpr std::size_t result = + decltype(foo())::value; diff --git a/example/variadic_sum/recursive.cpp b/example/variadic_sum/recursive.cpp index 84ca336..5fea438 100644 --- a/example/variadic_sum/recursive.cpp +++ b/example/variadic_sum/recursive.cpp @@ -1,22 +1,29 @@ -#include +#include -/// Compile-time unsigned int -template struct ct_uint_t { static constexpr unsigned value = N; }; +/// Compile-time std::size_t +template struct ct_uint_t { + static constexpr std::size_t value = N; +}; /// Recursive compile-time sum implementation -template constexpr auto sum(T const &) { return T::value; } +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } +template constexpr auto sum(T const &) { return T{}; } + template constexpr auto sum(T const &, Ts const &...tl) { - return T::value + sum(tl...); + return ct_uint_t{}; } // Driver code -#define GEN_MACRO(Z, N, TEXT) \ - TEXT {} - -constexpr unsigned foo() { - return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); +template constexpr auto foo() { + return [](std::index_sequence) { + return sum(ct_uint_t{}...); + } + (std::make_index_sequence{}); } -[[maybe_unused]] constexpr unsigned result = foo(); +[[maybe_unused]] constexpr std::size_t result = + decltype(foo())::value; From d7b224c5e2f4c5d88213da481d4c9d2caec1a040 Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Tue, 17 Jan 2023 12:39:49 +0100 Subject: [PATCH 38/43] Updated draft with benchmark description and graphs --- paper.md | 60 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/paper.md b/paper.md index 5bfc38d..a69d249 100644 --- a/paper.md +++ b/paper.md @@ -137,34 +137,50 @@ analysis. This section will cover a short yet practical example of ctbench usage. We want to calculate the sum of a series of integers known at compile-time, using a type -template to store unsigned integer values at compile-time. We need to include -`boost/preprocessor/repetition/enum.hpp` to scale the benchmark case as well: +template to store unsigned integer values at compile-time. + +We will be comparing the compile-time performance of two implementations: +- one based on a recursive function template, +- and one based on C++11 parameter pack expansion. + +First we need to include `utility` to instantiate our benchmark according to the +size parameter with `std::make_index_sequence`, and define the compile-time +container type for an unsigned integer: ```cpp -#include +#include -/// Compile-time unsigned int -template struct ct_uint_t { static constexpr unsigned value = N; }; +/// Compile-time std::size_t +template struct ct_uint_t { + static constexpr std::size_t value = N; +}; ``` -The first version of the metaprogram is an implementation based on a recursive -template function: +The first version of the metaprogram based on a recursive template function: ```cpp /// Recursive compile-time sum implementation -template constexpr auto sum(T const &) { return T::value; } +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } +template constexpr auto sum(T const &) { return T{}; } + template constexpr auto sum(T const &, Ts const &...tl) { - return T::value + sum(tl...); + return ct_uint_t{}; } ``` -And the other version relies on C++11 parameter pack expansion: +And the other version relying on C++11 parameter pack expansion: ```cpp /// Expansion compile-time sum implementation -template constexpr unsigned sum(Ts const &...) { - return (Ts::value + ...); +template constexpr auto sum(); + +template <> constexpr auto sum() { return ct_uint_t<0>{}; } + +template constexpr auto sum(Ts const &...) { + return ct_uint_t<(Ts::value + ... + 0)>{}; } ``` @@ -175,14 +191,15 @@ The driver code takes care of scaling the benchmark according to ```cpp // Driver code -#define GEN_MACRO(Z, N, TEXT) \ - TEXT {} - -constexpr unsigned foo() { - return sum(BOOST_PP_ENUM(BENCHMARK_SIZE, GEN_MACRO, ct_uint_t)); +template constexpr auto foo() { + return [](std::index_sequence) { + return sum(ct_uint_t{}...); + } + (std::make_index_sequence{}); } -[[maybe_unused]] constexpr unsigned result = foo(); +[[maybe_unused]] constexpr std::size_t result = + decltype(foo())::value; ``` The CMake code needed to run the benchmarks is the following: @@ -218,7 +235,6 @@ with `compare-all.json` containing the following: "width": 800, "height": 400, "key_ptrs": ["/name", "/args/detail"], - "_key_ptrs": ["/name"], "value_ptr": "/dur", "plot_file_extensions": [".svg"] } @@ -236,6 +252,12 @@ additional data is present in the `/args/detail` value of a timer event). Each graph compares the evolution of these timer events in function of the instanciation size of the benchmark cases. +![docs/images/ExecuteCompiler.svg](ExecuteCompiler){width=100%} +![docs/images/Total_Frontend.svg](Total Frontend){width=100%} +![docs/images/Total_Backend.svg](Total Backend){width=100%} +![docs/images/Total_InstantiateFunction.svg](Total InstantiateFunction){width=100%} +![docs/images/InstantiateFunction/foovoid.svg](InstantiateFunction foovoid){width=100%} + +Clang's profiler data is very exhaustive and insightful, however there is no +tooling to make sense of it in the context of variable size compile-time +benchmarks. **ctbench** tries to bridge the gap by providing a tool to analyze +this valuable data. It also improves upon existing tools by providing a solution +that's easy to integrate into existing CMake projects, and generates graphs in +various formats that are trivialy embeddable in documents like research papers, +web pages, or documentations. Additionally, relying on persistent configuration, +benchmark declaration and description files provides strong guarantees for +benchmark reproductibility, as opposed to web tools or interactive profilers. # Functionality @@ -117,8 +122,8 @@ developer friendliness, project integration, and component reusability. configuration files can be generated using a dedicated CLI tool. Despite the fact that **ctbench** was made to handle Clang's time-trace events, -it can also measure compiler execution time and report it in a synthetic time-trace -file, making it partially compatible with GCC as well. +it can also measure compiler execution time and report it in a synthetic +time-trace file, making it partially compatible with GCC as well. In addition to **ctbench**'s time-trace handling, it has a compatibility mode for compilers that do not support it like GCC. This mode works by measuring @@ -254,54 +259,28 @@ instanciation size of the benchmark cases. ![ExecuteCompiler](docs/images/ExecuteCompiler.svg){width=100%} +The first timer we want to look at is ExecuteCompiler, which is the total +compilation time. This is by far the more important metric as it is the most +comprehensive one, and can be interpreted + ![Total Frontend](docs/images/Total_Frontend.svg){width=100%} ![Total Backend](docs/images/Total_Backend.svg){width=100%} -![Total InstantiateFunction](docs/images/Total_InstantiateFunction.svg){width=100%} - -![InstantiateFunction foovoid](docs/images/InstantiateFunction/foovoid.svg){width=100%} - - -And within this class of timers, we can segregate functions. Here, we're looking -at the time for the InstantiateFunction event specific to the run_program -function, which is the driver function for both benchmark cases: +![InstantiateFunction foovoid](docs/images/InstantiateFunction/foovoid.svg){width=100%} -![run_programprogram_string time curves](docs/images/InstantiateFunction/run_programprogram_string.svg){width=100%} +And finally **ctbench** allows us to focus on symbol-specific events, such as +InstantiateFunction for the foovoid symbol (ie. the benchmark driver function). -However these graphs must not be interpreted alone. It is important to look at -the hierarchy of Clang's timer events using flame graph visualizers as events -might overlap each other. Also note that the hierarchy of events can vary from a -benchmark case to another within a same benchmark category.--> +- Poacher: https://github.com/jpenuchot/poacher +- Rule of Cheese: https://github.com/jpenuchot/rule-of-cheese # Statement of interest @@ -310,26 +289,6 @@ is the main C++ technical conference in France. It is being used to benchmark examples from the poacher[@poacher] project, which was briefly presented at the Meeting C++ 2022[@meetingcpp22] technical conference. - - - - +The Total Instantiate function timer is an interesting one as it explicitely +targets function instanciation time. Note that timers that are prefixed with +"Total" measure the total time spent in a timer section, regardless of the +specific symbol or source associated to its individual timer events. ![InstantiateFunction foovoid](docs/images/InstantiateFunction/foovoid.svg){width=100%} -And finally **ctbench** allows us to focus on symbol-specific events, such as -InstantiateFunction for the foovoid symbol (ie. the benchmark driver function). +Finally, we can take a look at `InstantiateFunction/foovoid.svg` which measures +the InstantiateFunction event time specifically for `foo()`, which is our +driver template function. Using Perfetto UI to look at the timer event +hierarchy, we can validate that the timer event for this specific symbol +includes the InstantiateFunction time for all the symbols that may be +instantiated within this function. -- Poacher: https://github.com/jpenuchot/poacher -- Rule of Cheese: https://github.com/jpenuchot/rule-of-cheese +This level of detail and granularity in the analysis of compile-time benchmarks +was never reached before, and may help us set good practices to improve the +compile-time performance of metaprograms. # Statement of interest @@ -289,22 +305,15 @@ is the main C++ technical conference in France. It is being used to benchmark examples from the poacher[@poacher] project, which was briefly presented at the Meeting C++ 2022[@meetingcpp22] technical conference. - +- [Rule of Cheese](https://github.com/jpenuchot/rule-of-cheese): A collection of + compile-time microbenchmarks to help set better C++ metaprogramming guidelines + to improve compile-time performance # Acknowledgements From 430189e40398baf102d8faffe4a01f8535a1f25e Mon Sep 17 00:00:00 2001 From: Jules P?nuchot Date: Wed, 18 Jan 2023 19:54:32 +0100 Subject: [PATCH 42/43] Acknowledgements + indent --- paper.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/paper.md b/paper.md index 0ef9841..af9f813 100644 --- a/paper.md +++ b/paper.md @@ -300,10 +300,10 @@ compile-time performance of metaprograms. # Statement of interest -**ctbench** was first presented at the CPPP 2021 conference[@ctbench-cppp21] which -is the main C++ technical conference in France. It is being used to benchmark -examples from the poacher[@poacher] project, which was briefly presented at the -Meeting C++ 2022[@meetingcpp22] technical conference. +**ctbench** was first presented at the CPPP 2021 conference[@ctbench-cppp21] +which is the main C++ technical conference in France. It is being used to +benchmark examples from the poacher[@poacher] project, which was briefly +presented at the Meeting C++ 2022[@meetingcpp22] technical conference. # Related projects @@ -317,6 +317,7 @@ Meeting C++ 2022[@meetingcpp22] technical conference. # Acknowledgements -We acknowledge contributions from Philippe Virouleau +We acknowledge contributions from Philippe Virouleau and Paul Keir for their +insightful suggestions. # References From 99bbee4c30620d8d5a7532a4efdff7f5b347dcb6 Mon Sep 17 00:00:00 2001 From: Joel Falcou Date: Thu, 26 Jan 2023 14:44:56 +0100 Subject: [PATCH 43/43] Joel's remarks --- paper.md | 50 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/paper.md b/paper.md index af9f813..83cebb3 100644 --- a/paper.md +++ b/paper.md @@ -2,7 +2,7 @@ title: 'ctbench - compile-time benchmarking and analysis' tags: - C++ - - metaprogramming + - meta-programming - compilation - benchmarking - library @@ -26,20 +26,20 @@ bibliography: paper.bib # Summary -With metaprogrammed libraries like Eigen[@eigen], Blaze[@blazelib], or -CTRE[@ctre] being developed, we're seeing increasing computing needs at compile +With libraries like Eigen[@eigen], Blaze[@blazelib], or CTRE[@ctre] being developed with +a large tempalte meta-programmed implementation, we're seeing increasing computing needs at compile time. These needs might grow even larger as C++ embeds more features over time to support and extend this kind of practices, like compile-time containers[@more-constexpr-containers] or static reflection[@static-reflection]. -However there is still no clear cut methodology to compare the performance -impact of different metaprogramming strategies. Moreover, new C++ features might -allow for new techniques that could offer better compile-time performance. -However, these claims still remain to be proven with proper methodology. +However, there is still no clear cut methodology to compare the performance +impact of different meta-programming strategies. But, as new C++ features +allows for new techniques with claimed better compile-time performance, +no proper methodologies is provided to back up those claims. This paper introduces **ctbench**, which is a set of tools for compile-time benchmarking and analysis in C++. It aims to provide developer-friendly tools to declare and run benchmarks, then aggregate, filter out, and plot the data to -analyze it. As such, **ctbench** is meant to become the first layer for proper +analyze it. As such, **ctbench** is meant to become the first layer for a proper scientific methodology for analyzing compile-time program behavior. @@ -49,14 +49,14 @@ benchmarking and establish the limits of what these tools can do. # Statement of need -C++ template metaprogramming raised interest for allowing computing libraries to +C++ template meta-programming raised interest for allowing computing libraries to offer great performance with a very high level of abstraction. As a tradeoff for interpreting representations of calculations at runtime, they are represented at compile-time, and transformed directly into their own programs. -As metaprogramming became easier with C++11 and C++17, it became more mainstream +As meta-programming became easier with C++11 and C++17, it became more mainstream and consequently, developers have to bear with longer compilation times without -being able to explain them. Therefore being able to measure compilation times is +being able to explain them. Therefore, being able to measure compilation times is increasingly important, and being able to explain them as well. A first generation of tools aims to tackle this issue with their own specific methodologies: @@ -65,7 +65,7 @@ methodologies: A-B compile-time comparisons in a web browser, - Metabench[@metabench] instantiates variably sized benchmarks using embedded Ruby (ERB) templating and plots compiler execution time, allowing scaling - analyses of metaprograms, + analyses of meta-programs, - Templight[@templight] adds Clang template instantiation inspection capabilities with debugging and profiling tools. @@ -75,20 +75,18 @@ the `-ftime-trace` flag. Its output contains data that can be directly linked to symbols in the source code, making it easier to study the impact of specific symbols on various stages of compilation. The output format is a JSON file meant to be compatible with Chrome's flame graph visualizer, that contains a series of -time events with optional metadata like the (mangled) C++ symbol or the file +time events with optional metadata like the mangled C++ symbol or the file related to an event. The profiling data can then be visualized using tools such as Google's [Perfetto UI](https://ui.perfetto.dev/). -![Perfetto UI displaying a Clang time trace file for Poacher's consecutive loops -benchmark case with the expression template backend]( -docs/images/perfetto-ui.png) +![Perfetto UI displaying a sample Clang time trace file](docs/images/perfetto-ui.png) Clang's profiler data is very exhaustive and insightful, however there is no tooling to make sense of it in the context of variable size compile-time benchmarks. **ctbench** tries to bridge the gap by providing a tool to analyze this valuable data. It also improves upon existing tools by providing a solution that's easy to integrate into existing CMake projects, and generates graphs in -various formats that are trivialy embeddable in documents like research papers, +various formats that are trivially embeddable in documents like research papers, web pages, or documentations. Additionally, relying on persistent configuration, benchmark declaration and description files provides strong guarantees for benchmark reproductibility, as opposed to web tools or interactive profilers. @@ -98,7 +96,7 @@ benchmark reproductibility, as opposed to web tools or interactive profilers. Originally inspired by Metabench[@metabench], **ctbench** development was driven by the need for a similar tool that allows the observation of Clang's time-trace files to help get a more comprehensive view on the impact of -metaprogramming techniques on compile times. A strong emphasis was put on +meta-programming techniques on compile times. A strong emphasis was put on developer friendliness, project integration, and component reusability. **ctbench** provides: @@ -109,14 +107,14 @@ developer friendliness, project integration, and component reusability. Although CMake is not a proper programming language, it is used as the main API for **ctbench** as most C++ developers are already familiar with it. -- a set of JSON-configurable plotters with customizable data aggretation +- a set of JSON-configurable plotters with customizable data aggregation features, which can be reused as a C++ library The core library provides data representations to handle benchmarks cases - instantited at several sizes, each instance being repeated at least once. It + instantiated at several sizes, each instance being repeated at least once. It also provides tools to aggregate, filter, and sort data from time-trace events, as well as various plotters that provide different aggregation and - vizualisation strategies. The plotters can generate files in various format + visualisation strategies. The plotters can generate files in various format thanks to the Sciplot[@sciplot] library, and they are highly configurable through JSON configuration files that are well documented. Default configuration files can be generated using a dedicated CLI tool. @@ -127,7 +125,7 @@ time-trace file, making it partially compatible with GCC as well. In addition to **ctbench**'s time-trace handling, it has a compatibility mode for compilers that do not support it like GCC. This mode works by measuring -compiler execution time just like Metabench and generating a time-trace file +compiler execution time just like Metabench[@metabench] and generating a time-trace file that contains compiler execution time. Moreover, the tooling makes defining compilers per-target possible within a CMake project, allowing black-box compiler performance comparisons between GCC and Clang for example or @@ -274,13 +272,13 @@ hierarchy to take a look at frontend and backend execution times. The backend is not being impacted here, supposedly because this is purely a compile-time program, and the output program is empty. However this might not be -the case for all metaprograms, and metaprograms might have different impacts on +the case for all meta-programs, and meta-programs might have different impacts on the backend as they may generate programs in different ways (ie. generate more symbols, larger symbols, more data structures, etc.). ![Total InstantiateFunction](docs/images/Total_InstantiateFunction.svg){width=100%} -The Total Instantiate function timer is an interesting one as it explicitely +The Total Instantiate function timer is an interesting one as it explicitly targets function instanciation time. Note that timers that are prefixed with "Total" measure the total time spent in a timer section, regardless of the specific symbol or source associated to its individual timer events. @@ -296,7 +294,7 @@ instantiated within this function. This level of detail and granularity in the analysis of compile-time benchmarks was never reached before, and may help us set good practices to improve the -compile-time performance of metaprograms. +compile-time performance of meta-programs. # Statement of interest @@ -312,7 +310,7 @@ presented at the Meeting C++ 2022[@meetingcpp22] technical conference. C++20 - [Rule of Cheese](https://github.com/jpenuchot/rule-of-cheese): A collection of - compile-time microbenchmarks to help set better C++ metaprogramming guidelines + compile-time microbenchmarks to help set better C++ meta-programming guidelines to improve compile-time performance # Acknowledgements