From c7f02cbab2f68bc4296b2b4b8e4550de714291f0 Mon Sep 17 00:00:00 2001 From: pravirkr Date: Tue, 7 May 2024 01:49:55 +0300 Subject: [PATCH] impose c++20 --- .clang-format | 138 ++---------------------------------- .clang-tidy | 50 ++++++------- .gitignore | 2 +- CMakeLists.txt | 2 +- bench/fdmt_b.cpp | 97 ++++++++++++------------- include/dmt/fdmt_base.hpp | 9 +-- include/dmt/fdmt_cpu.hpp | 14 ++-- include/dmt/fdmt_gpu.hpp | 69 +++++++----------- lib/dmt/fdmt_utils.hpp | 18 ++--- lib/fdmt_cpu.cpp | 75 +++++++++++--------- lib/fdmt_utils.cpp | 32 ++++----- src/dmt_python.cpp | 47 ++++++------ tests/cpp/fdmt_cpu_t.cpp | 7 +- tests/cpp/fdmt_utils_t.cpp | 65 ++++++++--------- tests/python/test_libdmt.py | 6 +- 15 files changed, 247 insertions(+), 384 deletions(-) diff --git a/.clang-format b/.clang-format index 6d6ceb7..720934b 100644 --- a/.clang-format +++ b/.clang-format @@ -1,137 +1,11 @@ --- -Language: Cpp -# BasedOnStyle: LLVM +BasedOnStyle: LLVM +IndentWidth: 4 +--- +Language: Cpp AccessModifierOffset: -4 -AlignAfterOpenBracket: true -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: true -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: All -AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: Never -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: Yes -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterCaseLabel: false - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true - SplitEmptyNamespace: true -BreakBeforeBinaryOperators: All -BreakBeforeBraces: Attach -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: true -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: false -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DeriveLineEnding: true +AlignConsecutiveAssignments: + Enabled: true DerivePointerAlignment: false -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: true -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Priority: 2 - SortPriority: 0 - - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Priority: 3 - SortPriority: 0 - - Regex: '.*' - Priority: 1 - SortPriority: 0 -IncludeIsMainRegex: '(Test)?$' -IncludeIsMainSourceRegex: '' -IndentCaseLabels: true -IndentGotoLabels: true -IndentPPDirectives: None -IndentWidth: 4 -IndentWrappedFunctionNames: false -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: true -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: Inner -ObjCBinPackProtocolList: Auto -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true PackConstructorInitializers: CurrentLine -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 19 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left -ReflowComments: true -SortIncludes: false -SortUsingDeclarations: false -SpaceAfterCStyleCast: false -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true -SpaceBeforeCpp11BracedList: false -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyBlock: false -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 2 -SpacesInAngles: false -SpacesInConditionalStatement: false -SpacesInContainerLiterals: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false -Standard: Latest -StatementMacros: - - Q_UNUSED - - QT_REQUIRE_VERSION -TabWidth: 4 -UseCRLF: false -UseTab: Never -... diff --git a/.clang-tidy b/.clang-tidy index 1916fe3..2cdcb4a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,29 +1,29 @@ --- -Checks: " - bugprone-*, - -bugprone-easily-swappable-parameters, - clang-diagnostic-*, - clang-analyzer-*, - cppcoreguidelines*, - -cppcoreguidelines-avoid-magic-numbers, - -cppcoreguidelines-init-variables, - -cppcoreguidelines-pro-bounds-pointer-arithmetic, - -cppcoreguidelines-pro-bounds-constant-array-index, - google-*, - llvm-include-order, - misc-*, - -misc-non-private-member-variables-in-classes, - modernize-*, - -modernize-use-trailing-return-type, - -modernize-use-nodiscard, - openmp-*, - performance-*, - portability-*, - readability-*, - -readability-identifier-length, - -readability-isolate-declaration, - -readability-magic-numbers, -" +Checks: > + bugprone-*, + -bugprone-easily-swappable-parameters, + clang-analyzer-*, + clang-diagnostic-*, + concurrency-*, + cppcoreguidelines*, + -cppcoreguidelines-avoid-magic-numbers, + -cppcoreguidelines-init-variables, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + google-*, + llvm-include-order, + misc-*, + -misc-non-private-member-variables-in-classes, + modernize-*, + -modernize-use-trailing-return-type, + -modernize-use-nodiscard, + openmp-*, + performance-*, + portability-*, + readability-*, + -readability-identifier-length, + -readability-isolate-declaration, + -readability-magic-numbers, + WarningsAsErrors: '' HeaderFilterRegex: ".*" FormatStyle: 'file' diff --git a/.gitignore b/.gitignore index 3302103..aff30d1 100644 --- a/.gitignore +++ b/.gitignore @@ -273,7 +273,7 @@ pyrightconfig.json ### VisualStudioCode ### .vscode/* -!.vscode/settings.json +.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b88018..4a42798 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ option(BUILD_BENCHMARKS "Build benchmarks" OFF) option(CODE_COVERAGE "Enable coverage reporting" OFF) option(ENABLE_FAST_MATH "Enable fast math flags" ON) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake) diff --git a/bench/fdmt_b.cpp b/bench/fdmt_b.cpp index 9a822b5..238f639 100644 --- a/bench/fdmt_b.cpp +++ b/bench/fdmt_b.cpp @@ -1,13 +1,14 @@ #include #include +#include #include +#include #include -#include class FDMTFixture : public benchmark::Fixture { public: - void SetUp(const ::benchmark::State& state) { + void SetUp(const ::benchmark::State& state) override { f_min = 704.0F; f_max = 1216.0F; nchans = 4096; @@ -16,7 +17,7 @@ class FDMTFixture : public benchmark::Fixture { nsamps = state.range(0); } - void TearDown(const ::benchmark::State&) {} + void TearDown(const ::benchmark::State& /*unused*/) override {} template std::vector generate_vector(size_t size, std::mt19937& gen) { @@ -26,12 +27,12 @@ class FDMTFixture : public benchmark::Fixture { return vec; } - float f_min; - float f_max; - size_t nchans; - float tsamp; - size_t dt_max; - size_t nsamps; + float f_min{}; + float f_max{}; + size_t nchans{}; + float tsamp{}; + size_t dt_max{}; + size_t nsamps{}; }; BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_plan_seq_cpu)(benchmark::State& state) { @@ -40,121 +41,121 @@ BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_plan_seq_cpu)(benchmark::State& state) { } } -BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_initialise_seq_cpu)(benchmark::State& state) { +BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_initialise_seq_cpu) +(benchmark::State& state) { + FDMTCPU::set_num_threads(1); FDMTCPU fdmt(f_min, f_max, nchans, nsamps, tsamp, dt_max); - fdmt.set_num_threads(1); std::random_device rd; std::mt19937 gen(rd()); - auto waterfall = generate_vector(nchans * nsamps, gen); - const auto& plan = fdmt.get_plan(); + auto waterfall = generate_vector(nchans * nsamps, gen); + const auto& plan = fdmt.get_plan(); const auto state_size = plan.state_shape[0][3] * plan.state_shape[0][4]; std::vector state_init(state_size, 0.0F); for (auto _ : state) { - fdmt.initialise(waterfall.data(), state_init.data()); + fdmt.initialise(std::span(waterfall), std::span(state_init)); } } -BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_initialise_par_cpu)(benchmark::State& state) { +BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_initialise_par_cpu) +(benchmark::State& state) { + FDMTCPU::set_num_threads(8); FDMTCPU fdmt(f_min, f_max, nchans, nsamps, tsamp, dt_max); - fdmt.set_num_threads(8); std::random_device rd; std::mt19937 gen(rd()); - auto waterfall = generate_vector(nchans * nsamps, gen); - const auto& plan = fdmt.get_plan(); + auto waterfall = generate_vector(nchans * nsamps, gen); + const auto& plan = fdmt.get_plan(); const auto state_size = plan.state_shape[0][3] * plan.state_shape[0][4]; std::vector state_init(state_size, 0.0F); for (auto _ : state) { - fdmt.initialise(waterfall.data(), state_init.data()); + fdmt.initialise(std::span(waterfall), std::span(state_init)); } } -BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_execute_seq_cpu)(benchmark::State& state) { +BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_execute_seq_cpu) +(benchmark::State& state) { + FDMTCPU::set_num_threads(1); FDMTCPU fdmt(f_min, f_max, nchans, nsamps, tsamp, dt_max); - fdmt.set_num_threads(1); std::random_device rd; std::mt19937 gen(rd()); auto waterfall = generate_vector(nchans * nsamps, gen); std::vector dmt(fdmt.get_dt_grid_final().size() * nsamps, 0.0F); for (auto _ : state) { - fdmt.execute(waterfall.data(), waterfall.size(), dmt.data(), - dmt.size()); + fdmt.execute(std::span(waterfall), std::span(dmt)); } } -BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_execute_par_cpu)(benchmark::State& state) { +BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_execute_par_cpu) +(benchmark::State& state) { + FDMTCPU::set_num_threads(8); FDMTCPU fdmt(f_min, f_max, nchans, nsamps, tsamp, dt_max); - fdmt.set_num_threads(8); std::random_device rd; std::mt19937 gen(rd()); auto waterfall = generate_vector(nchans * nsamps, gen); std::vector dmt(fdmt.get_dt_grid_final().size() * nsamps, 0.0F); for (auto _ : state) { - fdmt.execute(waterfall.data(), waterfall.size(), dmt.data(), - dmt.size()); + fdmt.execute(std::span(waterfall), std::span(dmt)); } } -BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_overall_seq_cpu)(benchmark::State& state) { +BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_overall_seq_cpu) +(benchmark::State& state) { std::random_device rd; std::mt19937 gen(rd()); auto waterfall = generate_vector(nchans * nsamps, gen); - + for (auto _ : state) { + FDMTCPU::set_num_threads(1); FDMTCPU fdmt(f_min, f_max, nchans, nsamps, tsamp, dt_max); state.PauseTiming(); - fdmt.set_num_threads(1); std::vector dmt(fdmt.get_dt_grid_final().size() * nsamps, 0.0F); state.ResumeTiming(); - - fdmt.execute(waterfall.data(), waterfall.size(), dmt.data(), - dmt.size()); + fdmt.execute(std::span(waterfall), std::span(dmt)); } } -BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_overall_par_cpu)(benchmark::State& state) { +BENCHMARK_DEFINE_F(FDMTFixture, BM_fdmt_overall_par_cpu) +(benchmark::State& state) { std::random_device rd; std::mt19937 gen(rd()); auto waterfall = generate_vector(nchans * nsamps, gen); - + for (auto _ : state) { + FDMTCPU::set_num_threads(8); FDMTCPU fdmt(f_min, f_max, nchans, nsamps, tsamp, dt_max); state.PauseTiming(); - fdmt.set_num_threads(8); std::vector dmt(fdmt.get_dt_grid_final().size() * nsamps, 0.0F); state.ResumeTiming(); - - fdmt.execute(waterfall.data(), waterfall.size(), dmt.data(), - dmt.size()); + fdmt.execute(std::span(waterfall), std::span(dmt)); } } -constexpr size_t min_nsamps = 1 << 11; -constexpr size_t max_nsamps = 1 << 16; +constexpr size_t kMinNsamps = 1 << 11; +constexpr size_t kMaxNsamps = 1 << 16; BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_plan_seq_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_initialise_seq_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_initialise_par_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_execute_seq_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_execute_par_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_overall_seq_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_REGISTER_F(FDMTFixture, BM_fdmt_overall_par_cpu) ->RangeMultiplier(2) - ->Range(min_nsamps, max_nsamps); + ->Range(kMinNsamps, kMaxNsamps); BENCHMARK_MAIN(); diff --git a/include/dmt/fdmt_base.hpp b/include/dmt/fdmt_base.hpp index 38dd227..af3c421 100644 --- a/include/dmt/fdmt_base.hpp +++ b/include/dmt/fdmt_base.hpp @@ -2,6 +2,7 @@ #include #include +#include #include using SizeType = size_t; @@ -51,10 +52,10 @@ class FDMT { const DtGridType& get_dt_grid_final() const; std::vector get_dm_grid_final() const; static void set_log_level(int level); - virtual void execute(const float* waterfall, size_t waterfall_size, - float* dmt, size_t dmt_size) - = 0; - virtual void initialise(const float* waterfall, float* state) = 0; + virtual void execute(std::span waterfall, + std::span dmt) = 0; + virtual void initialise(std::span waterfall, + std::span state) = 0; protected: void check_inputs(size_t waterfall_size, size_t dmt_size) const; diff --git a/include/dmt/fdmt_cpu.hpp b/include/dmt/fdmt_cpu.hpp index 8a7cc6d..6d73363 100644 --- a/include/dmt/fdmt_cpu.hpp +++ b/include/dmt/fdmt_cpu.hpp @@ -5,18 +5,18 @@ class FDMTCPU : public FDMT { public: FDMTCPU(float f_min, float f_max, size_t nchans, size_t nsamps, float tsamp, - size_t dt_max, size_t dt_step = 1, size_t dt_min = 0); + size_t dt_max, size_t dt_step = 1, size_t dt_min = 0); static void set_num_threads(int nthreads); - void execute(const float* waterfall, size_t waterfall_size, float* dmt, - size_t dmt_size) override; - void initialise(const float* waterfall, float* state) override; + void execute(std::span waterfall, + std::span dmt) override; + void initialise(std::span waterfall, + std::span state) override; private: // Buffers std::vector m_state_in; std::vector m_state_out; - void execute_iter(const float* state_in, float* state_out, - SizeType i_iter); + void execute_iter(std::span state_in, + std::span state_out, SizeType i_iter); }; - diff --git a/include/dmt/fdmt_gpu.hpp b/include/dmt/fdmt_gpu.hpp index c73aa65..9d208cb 100644 --- a/include/dmt/fdmt_gpu.hpp +++ b/include/dmt/fdmt_gpu.hpp @@ -4,21 +4,11 @@ #include -using DtGridTypeD = thrust::device_vector; -using DtPlanTypeD = thrust::tuple; -using StShapeTypeD = thrust::tuple; - -struct SubbandPlanD { - DtGridTypeD dt_grid_d; - thrust::device_vector dt_plan_d; -}; - struct FDMTPlanD { - thrust::device_vector df_top_d; - thrust::device_vector df_bot_d; - thrust::device_vector dt_grid_sub_top_d; - thrust::device_vector state_shape_d; - thrust::device_vector> sub_plan_d; + thrust::device_vector state_shape_d; + thrust::device_vector state_idx_d; + thrust::device_vector dt_grid_d; + thrust::device_vector dt_plan_d; }; class FDMTGPU : public FDMT { @@ -33,35 +23,30 @@ class FDMTGPU : public FDMT { thrust::device_vector m_state_in_d; thrust::device_vector m_state_out_d; - FDMTPlanD fdmt_plan_d_; - - FDMTPlan_d transferPlanToDevice(const FDMTPlan& plan) { - FDMTPlan_d plan_d; - - plan_d.df_top = plan.df_top; - plan_d.df_bot = plan.df_bot; - plan_d.dt_grid_sub_top = plan.dt_grid_sub_top; - plan_d.state_shape = plan.state_shape; - - for (const auto& subPlanVector : plan.sub_plan) { - thrust::device_vector subPlanVector_d; - - for (const auto& subPlan : subPlanVector) { - SubbandPlanD subPlan_d; + FDMTPlanD m_fdmt_plan_d; - subPlan_d.f_start = subPlan.f_start; - subPlan_d.f_end = subPlan.f_end; - subPlan_d.f_mid1 = subPlan.f_mid1; - subPlan_d.f_mid2 = subPlan.f_mid2; - subPlan_d.dt_grid = subPlan.dt_grid; - subPlan_d.dt_plan = subPlan.dt_plan; - - subPlanVector_d.push_back(subPlan_d); + FDMTPlanD transfer_plan_to_device() { + const auto& plan = get_plan(); + FDMTPlanD plan_d; + for (const auto& state_shape_iter : plan.state_shape) { + for (const auto& shape : state_shape_iter) { + plan_d.state_shape_d.push_back(shape); + } + } + // flatten sub_plan and transfer to device + for (const auto& sub_plan_iter : plan.sub_plan) { + for (const auto& sub_plan : sub_plan_iter) { + plan_d.state_idx_d.push_back(sub_plan.state_idx); + for (const auto& dt : sub_plan.dt_grid) { + plan_d.dt_grid_d.push_back(dt); + } + for (const auto& dt_tuple : sub_plan.dt_plan) { + for (const auto& idt : dt_tuple) { + plan_d.dt_plan_d.push_back(idt); + } + } } - - plan_d.sub_plan.push_back(subPlanVector_d); } - return plan_d; - } -}; + }; +}; \ No newline at end of file diff --git a/lib/dmt/fdmt_utils.hpp b/lib/dmt/fdmt_utils.hpp index 5341e23..37a0b6c 100644 --- a/lib/dmt/fdmt_utils.hpp +++ b/lib/dmt/fdmt_utils.hpp @@ -1,10 +1,14 @@ #pragma once #include +#include #include -constexpr float kDispCoeff = -2.0; -constexpr float kDispConst = 4.148808e3; +constexpr float kDispCoeff = -2.0; +constexpr float kDispConstLK = 4.1488080e3; // L&K Handbook of Pulsar Astronomy +constexpr float kDispConstMT = 4.1493774e3; // TEMPO2, Manchester&Taylor (1972) +constexpr float kDispConstSI = 4.1488066e3; // SI value, Kulkarni (2020) +constexpr float kDispConst = kDispConstMT; namespace fdmt { @@ -13,13 +17,11 @@ float cff(float f1_start, float f1_end, float f2_start, float f2_end); size_t calculate_dt_sub(float f_start, float f_end, float f_min, float f_max, size_t dt); -void add_offset_kernel(const float* arr1, size_t size_in1, const float* arr2, - size_t size_in2, float* arr_out, size_t size_out, - size_t offset); +void add_offset_kernel(std::span arr1, std::span arr2, + std::span arr_out, size_t offset); -void copy_kernel(const float* arr1, size_t size_in, float* arr_out, - size_t size_out); +void copy_kernel(std::span arr1, std::span arr_out); size_t find_closest_index(const std::vector& arr_sorted, size_t val); -} // namespace fdmt +} // namespace fdmt diff --git a/lib/fdmt_cpu.cpp b/lib/fdmt_cpu.cpp index d708de7..146a808 100644 --- a/lib/fdmt_cpu.cpp +++ b/lib/fdmt_cpu.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #ifdef USE_OPENMP #include @@ -24,29 +25,30 @@ void FDMTCPU::set_num_threads(int nthreads) { #endif } -void FDMTCPU::execute(const float* waterfall, size_t waterfall_size, float* dmt, - size_t dmt_size) { - check_inputs(waterfall_size, dmt_size); - float* state_in_ptr = m_state_in.data(); - float* state_out_ptr = m_state_out.data(); +void FDMTCPU::execute(std::span waterfall, std::span dmt) { + check_inputs(waterfall.size(), dmt.size()); + std::span state_in_span(m_state_in); + std::span state_out_span(m_state_out); - initialise(waterfall, state_in_ptr); + initialise(waterfall, state_in_span); const auto niters = get_niters(); for (size_t i_iter = 1; i_iter < niters + 1; ++i_iter) { - execute_iter(state_in_ptr, state_out_ptr, i_iter); + execute_iter(state_in_span, state_out_span, i_iter); if (i_iter < niters) { - std::swap(state_in_ptr, state_out_ptr); + std::swap(state_in_span, state_out_span); } } - std::copy_n(state_out_ptr, dmt_size, dmt); + std::copy_n(state_out_span.data(), dmt.size(), dmt.data()); } -void FDMTCPU::initialise(const float* waterfall, float* state) { +void FDMTCPU::initialise(std::span waterfall, + std::span state) { const auto& plan = get_plan(); const auto& sub_plan_init = plan.sub_plan[0]; const auto& nsamps = plan.state_shape[0][4]; #ifdef USE_OPENMP -#pragma omp parallel for +#pragma omp parallel for default(none) \ + shared(sub_plan_init, state, waterfall, nsamps) #endif for (size_t i_sub = 0; i_sub < sub_plan_init.size(); ++i_sub) { const auto& dt_grid_sub = sub_plan_init[i_sub].dt_grid; @@ -58,8 +60,8 @@ void FDMTCPU::initialise(const float* waterfall, float* state) { for (size_t i = isamp - dt_grid_sub_min; i <= isamp; ++i) { sum += waterfall[i_sub * nsamps + i]; } - state[state_sub_idx + isamp] - = sum / static_cast(dt_grid_sub_min + 1); + state[state_sub_idx + isamp] = + sum / static_cast(dt_grid_sub_min + 1); } // Initialise state for [:, dt_grid_init[i_dt], dt_grid_init[i_dt]:] for (size_t i_dt = 1; i_dt < dt_grid_sub.size(); ++i_dt) { @@ -70,23 +72,23 @@ void FDMTCPU::initialise(const float* waterfall, float* state) { for (size_t i = isamp - dt_cur; i < isamp - dt_prev; ++i) { sum += waterfall[i_sub * nsamps + i]; } - state[state_sub_idx + i_dt * nsamps + isamp] - = (state[state_sub_idx + (i_dt - 1) * nsamps + isamp] - * (static_cast(dt_prev) + 1.0F) - + sum) - / (static_cast(dt_cur) + 1.0F); + state[state_sub_idx + i_dt * nsamps + isamp] = + (state[state_sub_idx + (i_dt - 1) * nsamps + isamp] * + (static_cast(dt_prev) + 1.0F) + + sum) / + (static_cast(dt_cur) + 1.0F); } } } - const auto& [nchans_l, ndt_min, ndt_max, nchans_ndt, nsamps_l] - = plan.state_shape[0]; - spdlog::debug("FDMT: Iteration {}, dimensions: {} ({}x[{}..{}]) x {}", - 0, nchans_ndt, nchans_l, ndt_min, ndt_max, nsamps_l); + const auto& [nchans_l, ndt_min, ndt_max, nchans_ndt, nsamps_l] = + plan.state_shape[0]; + spdlog::debug("FDMT: Iteration {}, dimensions: {} ({}x[{}..{}]) x {}", 0, + nchans_ndt, nchans_l, ndt_min, ndt_max, nsamps_l); } -void FDMTCPU::execute_iter(const float* state_in, float* state_out, - size_t i_iter) { +void FDMTCPU::execute_iter(std::span state_in, + std::span state_out, size_t i_iter) { const auto& plan = get_plan(); const auto& sub_plan_cur = plan.sub_plan[i_iter]; const auto& sub_plan_prev = plan.sub_plan[i_iter - 1]; @@ -98,28 +100,31 @@ void FDMTCPU::execute_iter(const float* state_in, float* state_out, const auto& state_sub_idx_head = sub_plan_prev[2 * i_sub + 1].state_idx; #ifdef USE_OPENMP -#pragma omp parallel for +#pragma omp parallel for default(none) \ + shared(dt_plan_sub, state_in, state_out, nsamps, state_sub_idx, \ + state_sub_idx_tail, state_sub_idx_head) #endif for (const auto& dt_plan : dt_plan_sub) { const auto& i_dt_out = dt_plan[0]; const auto& offset = dt_plan[1]; const auto& i_dt_tail = dt_plan[2]; const auto& i_dt_head = dt_plan[3]; - const float* tail - = &state_in[state_sub_idx_tail + i_dt_tail * nsamps]; - float* out = &state_out[state_sub_idx + i_dt_out * nsamps]; + + std::span tail = state_in.subspan( + state_sub_idx_tail + i_dt_tail * nsamps, nsamps); + std::span out = + state_out.subspan(state_sub_idx + i_dt_out * nsamps, nsamps); if (i_dt_head == SIZE_MAX) { - fdmt::copy_kernel(tail, nsamps, out, nsamps); + fdmt::copy_kernel(tail, out); } else { - const float* head - = &state_in[state_sub_idx_head + i_dt_head * nsamps]; - fdmt::add_offset_kernel(tail, nsamps, head, nsamps, out, nsamps, - offset); + std::span head = state_in.subspan( + state_sub_idx_head + i_dt_head * nsamps, nsamps); + fdmt::add_offset_kernel(tail, head, out, offset); } } } - const auto& [nchans_l, ndt_min, ndt_max, nchans_ndt, nsamps_l] - = plan.state_shape[i_iter]; + const auto& [nchans_l, ndt_min, ndt_max, nchans_ndt, nsamps_l] = + plan.state_shape[i_iter]; spdlog::debug("FDMT: Iteration {}, dimensions: {} ({}x[{}..{}]) x {}", i_iter, nchans_ndt, nchans_l, ndt_min, ndt_max, nsamps_l); } diff --git a/lib/fdmt_utils.cpp b/lib/fdmt_utils.cpp index a2e1799..e1fe0e7 100644 --- a/lib/fdmt_utils.cpp +++ b/lib/fdmt_utils.cpp @@ -5,8 +5,8 @@ #include "dmt/fdmt_utils.hpp" float fdmt::cff(float f1_start, float f1_end, float f2_start, float f2_end) { - return (std::pow(f1_start, kDispCoeff) - std::pow(f1_end, kDispCoeff)) - / (std::pow(f2_start, kDispCoeff) - std::pow(f2_end, kDispCoeff)); + return (std::pow(f1_start, kDispCoeff) - std::pow(f1_end, kDispCoeff)) / + (std::pow(f2_start, kDispCoeff) - std::pow(f2_end, kDispCoeff)); } size_t fdmt::calculate_dt_sub(float f_start, float f_end, float f_min, @@ -15,23 +15,22 @@ size_t fdmt::calculate_dt_sub(float f_start, float f_end, float f_min, return static_cast(std::round(static_cast(dt) * ratio)); } - -void fdmt::add_offset_kernel(const float* arr1, size_t size_in1, - const float* arr2, size_t size_in2, float* arr_out, - size_t size_out, size_t offset) { - if (size_in1 != size_in2) { +void fdmt::add_offset_kernel(std::span arr1, + std::span arr2, + std::span arr_out, size_t offset) { + if (arr1.size() != arr2.size()) { throw std::runtime_error("Input sizes are not equal"); } - if (size_out < size_in1) { + if (arr_out.size() < arr1.size()) { throw std::runtime_error("Output size is less than input size"); } - if (offset >= size_in1) { + if (offset >= arr1.size()) { throw std::runtime_error("Offset is greater than input size"); } - size_t nsum = size_in1 - offset; + size_t nsum = arr1.size() - offset; size_t t_ind = 0; - std::copy_n(arr1, offset, arr_out); + std::copy_n(arr1.data(), offset, arr_out.data()); t_ind += offset; for (size_t i = 0; i < nsum; ++i) { @@ -39,19 +38,18 @@ void fdmt::add_offset_kernel(const float* arr1, size_t size_in1, } t_ind += nsum; - size_t nrest = std::min(offset, size_out - t_ind); + size_t nrest = std::min(offset, arr_out.size() - t_ind); if (nrest > 0) { - std::copy_n(arr2 + nsum, nrest, arr_out + t_ind); + std::copy_n(arr2.data() + nsum, nrest, arr_out.data() + t_ind); t_ind += nrest; } } -void fdmt::copy_kernel(const float* arr1, size_t size_in, float* arr_out, - size_t size_out) { - if (size_out < size_in) { +void fdmt::copy_kernel(std::span arr1, std::span arr_out) { + if (arr_out.size() < arr1.size()) { throw std::runtime_error("Output size is less than input size"); } - std::copy(arr1, arr1 + size_in, arr_out); + std::copy_n(arr1.data(), arr1.size(), arr_out.data()); } size_t fdmt::find_closest_index(const std::vector& arr_sorted, diff --git a/src/dmt_python.cpp b/src/dmt_python.cpp index a244b0e..a62624c 100644 --- a/src/dmt_python.cpp +++ b/src/dmt_python.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -12,10 +13,10 @@ template inline py::array_t as_pyarray(Sequence&& seq) { auto size = seq.size(); auto data = seq.data(); - std::unique_ptr seq_ptr - = std::make_unique(std::forward(seq)); + std::unique_ptr seq_ptr = + std::make_unique(std::forward(seq)); auto capsule = py::capsule(seq_ptr.get(), [](void* p) { - std::unique_ptr(reinterpret_cast(p)); // NOLINT + std::unique_ptr(reinterpret_cast(p)); // NOLINT }); seq_ptr.release(); return py::array(size, data, capsule); @@ -86,26 +87,30 @@ PYBIND11_MODULE(libdmt, mod) { "execute", [](FDMTCPU& fdmt, const py::array_t& waterfall) { const auto* shape = waterfall.shape(); - const auto dt_final_size - = static_cast(fdmt.get_dt_grid_final().size()); + const auto dt_final_size = + static_cast(fdmt.get_dt_grid_final().size()); py::array_t dmt( {dt_final_size, shape[1]}); - fdmt.execute(waterfall.data(), waterfall.size(), dmt.mutable_data(), - dmt.size()); + fdmt.execute( + std::span(waterfall.data(), waterfall.size()), + std::span(dmt.mutable_data(), dmt.size())); return dmt; }); - cls_fdmt.def("initialise", - [](FDMTCPU& fdmt, - const py::array_t& waterfall) { - const auto* shape = waterfall.shape(); - const auto& plan = fdmt.get_plan(); - const auto nchans_ndt - = static_cast(plan.state_shape[0][3]); - py::array_t state( - {nchans_ndt, shape[1]}); - std::fill(state.mutable_data(), - state.mutable_data() + state.size(), 0.0F); - fdmt.initialise(waterfall.data(), state.mutable_data()); - return state; - }); + cls_fdmt.def( + "initialise", + [](FDMTCPU& fdmt, + const py::array_t& waterfall) { + const auto* shape = waterfall.shape(); + const auto& plan = fdmt.get_plan(); + const auto nchans_ndt = + static_cast(plan.state_shape[0][3]); + py::array_t state( + {nchans_ndt, shape[1]}); + std::fill(state.mutable_data(), state.mutable_data() + state.size(), + 0.0F); + fdmt.initialise( + std::span(waterfall.data(), waterfall.size()), + std::span(state.mutable_data(), state.size())); + return state; + }); } diff --git a/tests/cpp/fdmt_cpu_t.cpp b/tests/cpp/fdmt_cpu_t.cpp index 8b51314..b0dfb9a 100644 --- a/tests/cpp/fdmt_cpu_t.cpp +++ b/tests/cpp/fdmt_cpu_t.cpp @@ -2,6 +2,7 @@ #include #include +#include TEST_CASE("FDMT class tests", "[fdmt]") { SECTION("Test case 1: Constructor and getter methods") { @@ -23,7 +24,8 @@ TEST_CASE("FDMT class tests", "[fdmt]") { const auto& plan = fdmt.get_plan(); const auto state_size = plan.state_shape[0][3] * plan.state_shape[0][4]; std::vector state(state_size, 0.0F); - REQUIRE_NOTHROW(fdmt.initialise(waterfall.data(), state.data())); + REQUIRE_NOTHROW( + fdmt.initialise(std::span(waterfall), std::span(state))); } SECTION("Test case 3: execute method") { @@ -31,7 +33,6 @@ TEST_CASE("FDMT class tests", "[fdmt]") { std::vector waterfall(static_cast(500 * 1024), 1.0F); const size_t dt_final_size = fdmt.get_dt_grid_final().size(); std::vector dmt(dt_final_size * 1024, 0.0F); - REQUIRE_NOTHROW(fdmt.execute(waterfall.data(), waterfall.size(), - dmt.data(), dmt.size())); + REQUIRE_NOTHROW(fdmt.execute(std::span(waterfall), std::span(dmt))); } } diff --git a/tests/cpp/fdmt_utils_t.cpp b/tests/cpp/fdmt_utils_t.cpp index 7ea5985..9918a47 100644 --- a/tests/cpp/fdmt_utils_t.cpp +++ b/tests/cpp/fdmt_utils_t.cpp @@ -2,6 +2,7 @@ #include #include +#include TEST_CASE("cff", "[fdmt_utils]") { REQUIRE(fdmt::cff(1000.0F, 1500.0F, 1000.0F, 1500.0F) == 1.0F); @@ -10,8 +11,8 @@ TEST_CASE("cff", "[fdmt_utils]") { } TEST_CASE("calculate_dt_sub", "[fdmt_utils]") { - REQUIRE(fdmt::calculate_dt_sub(1000.0F, 1500.0F, 1000.0F, 1500.0F, 100) - == 100); + REQUIRE(fdmt::calculate_dt_sub(1000.0F, 1500.0F, 1000.0F, 1500.0F, 100) == + 100); REQUIRE(fdmt::calculate_dt_sub(1000.0F, 1500.0F, 1000.0F, 1500.0F, 0) == 0); } @@ -22,10 +23,9 @@ TEST_CASE("add_offset_kernel", "[fdmt_utils]") { std::vector arr_out(8, 0.0F); size_t offset = 2; REQUIRE_NOTHROW(fdmt::add_offset_kernel( - arr1.data(), arr1.size(), arr2.data(), arr2.size(), arr_out.data(), - arr_out.size(), offset)); - std::vector expected_output - = {1.0F, 2.0F, 9.0F, 11.0F, 13.0F, 9.0F, 10.0F, 0.0F}; + std::span(arr1), std::span(arr2), std::span(arr_out), offset)); + std::vector expected_output = {1.0F, 2.0F, 9.0F, 11.0F, + 13.0F, 9.0F, 10.0F, 0.0F}; REQUIRE(arr_out == expected_output); } SECTION("Test case 2: Output size less than input size") { @@ -33,10 +33,9 @@ TEST_CASE("add_offset_kernel", "[fdmt_utils]") { std::vector arr2 = {6.0F, 7.0F, 8.0F, 9.0F, 10.0F}; std::vector arr_out(4, 0.0F); size_t offset = 2; - REQUIRE_THROWS_AS(fdmt::add_offset_kernel(arr1.data(), arr1.size(), - arr2.data(), arr2.size(), - arr_out.data(), - arr_out.size(), offset), + REQUIRE_THROWS_AS(fdmt::add_offset_kernel(std::span(arr1), + std::span(arr2), + std::span(arr_out), offset), std::runtime_error); } @@ -45,10 +44,9 @@ TEST_CASE("add_offset_kernel", "[fdmt_utils]") { std::vector arr2 = {4.0F, 5.0F}; std::vector arr_out(5, 0.0F); size_t offset = 4; - REQUIRE_THROWS_AS(fdmt::add_offset_kernel(arr1.data(), arr1.size(), - arr2.data(), arr2.size(), - arr_out.data(), - arr_out.size(), offset), + REQUIRE_THROWS_AS(fdmt::add_offset_kernel(std::span(arr1), + std::span(arr2), + std::span(arr_out), offset), std::runtime_error); } SECTION("Test case 4: Empty input vectors") { @@ -56,10 +54,9 @@ TEST_CASE("add_offset_kernel", "[fdmt_utils]") { std::vector arr2; std::vector arr_out(3, 0.0F); size_t offset = 0; - REQUIRE_THROWS_AS(fdmt::add_offset_kernel(arr1.data(), arr1.size(), - arr2.data(), arr2.size(), - arr_out.data(), - arr_out.size(), offset), + REQUIRE_THROWS_AS(fdmt::add_offset_kernel(std::span(arr1), + std::span(arr2), + std::span(arr_out), offset), std::runtime_error); } SECTION("Test case 5: Varying offsets") { @@ -68,27 +65,23 @@ TEST_CASE("add_offset_kernel", "[fdmt_utils]") { std::vector arr_out(6, 0.0F); size_t offset1 = 0; REQUIRE_NOTHROW(fdmt::add_offset_kernel( - arr1.data(), arr1.size(), arr2.data(), arr2.size(), arr_out.data(), - arr_out.size(), offset1)); - std::vector expected_output - = {1.0F, 3.0F, 5.0F, 7.0F, 0.0F, 0.0F}; + std::span(arr1), std::span(arr2), std::span(arr_out), offset1)); + std::vector expected_output = {1.0F, 3.0F, 5.0F, + 7.0F, 0.0F, 0.0F}; REQUIRE(arr_out == expected_output); size_t offset2 = 1; REQUIRE_NOTHROW(fdmt::add_offset_kernel( - arr1.data(), arr1.size(), arr2.data(), arr2.size(), arr_out.data(), - arr_out.size(), offset2)); + std::span(arr1), std::span(arr2), std::span(arr_out), offset2)); expected_output = {0.0F, 2.0F, 4.0F, 6.0F, 4.0F, 0.0F}; REQUIRE(arr_out == expected_output); size_t offset3 = 2; REQUIRE_NOTHROW(fdmt::add_offset_kernel( - arr1.data(), arr1.size(), arr2.data(), arr2.size(), arr_out.data(), - arr_out.size(), offset3)); + std::span(arr1), std::span(arr2), std::span(arr_out), offset3)); expected_output = {0.0F, 1.0F, 3.0F, 5.0F, 3.0F, 4.0F}; REQUIRE(arr_out == expected_output); size_t offset4 = 3; REQUIRE_NOTHROW(fdmt::add_offset_kernel( - arr1.data(), arr1.size(), arr2.data(), arr2.size(), arr_out.data(), - arr_out.size(), offset4)); + std::span(arr1), std::span(arr2), std::span(arr_out), offset4)); expected_output = {0.0F, 1.0F, 2.0F, 4.0F, 2.0F, 3.0F}; REQUIRE(arr_out == expected_output); } @@ -99,8 +92,7 @@ TEST_CASE("copy_kernel", "[fdmt_utils]") { std::vector arr1 = {1.0F, 2.0F, 3.0F, 4.0F, 5.0F}; std::vector arr_out(10, 0.0F); ; - REQUIRE_NOTHROW(fdmt::copy_kernel(arr1.data(), arr1.size(), - arr_out.data(), arr_out.size())); + REQUIRE_NOTHROW(fdmt::copy_kernel(std::span(arr1), std::span(arr_out))); for (size_t i = 0; i < arr1.size(); ++i) { REQUIRE(arr_out[i] == arr1[i]); } @@ -111,17 +103,16 @@ TEST_CASE("copy_kernel", "[fdmt_utils]") { SECTION("Test case 2: Output size less than input size") { std::vector arr1 = {1.0F, 2.0F, 3.0F, 4.0F, 5.0F}; std::vector arr_out(3, 0.0F); - REQUIRE_THROWS_AS(fdmt::copy_kernel(arr1.data(), arr1.size(), - arr_out.data(), arr_out.size()), - std::runtime_error); + REQUIRE_THROWS_AS( + fdmt::copy_kernel(std::span(arr1), std::span(arr_out)), + std::runtime_error); } SECTION("Test case 4: Empty input vector") { std::vector arr1; std::vector arr_out(5, 0.0F); - REQUIRE_NOTHROW(fdmt::copy_kernel(arr1.data(), arr1.size(), - arr_out.data(), arr_out.size())); - for (size_t i = 0; i < arr_out.size(); ++i) { - REQUIRE(arr_out[i] == 0.0F); + REQUIRE_NOTHROW(fdmt::copy_kernel(std::span(arr1), std::span(arr_out))); + for (float i : arr_out) { + REQUIRE(i == 0.0F); } } } diff --git a/tests/python/test_libdmt.py b/tests/python/test_libdmt.py index c57202b..57d187d 100644 --- a/tests/python/test_libdmt.py +++ b/tests/python/test_libdmt.py @@ -9,10 +9,10 @@ def test_initialise_ones(self) -> None: dt_max = 512 thefdmt = libdmt.FDMT(1000, 1500, nchans, nsamples, 0.001, dt_max) waterfall = np.ones((nchans, nsamples), dtype=np.float32) - thefdmt_init = thefdmt.initialise(waterfall) + dmt_output = thefdmt.execute(waterfall) np.testing.assert_equal( - thefdmt_init.shape, - (nchans, thefdmt.dt_grid_init.size, nsamples), + dmt_output.shape, + (thefdmt.dt_grid_final.size, nsamples), ) """ np.testing.assert_equal(