From 30874f5d64332ff927792d540fe9c1895e2507d1 Mon Sep 17 00:00:00 2001 From: pravirkr Date: Fri, 10 May 2024 01:37:56 +0300 Subject: [PATCH] version bump for merging. benchmark updated --- CMakeLists.txt | 2 +- README.md | 4 ++- include/dmt/fdmt_base.hpp | 8 +++--- include/dmt/fdmt_gpu.hpp | 55 +++++++++++++++++++-------------------- lib/dmt/fdmt_utils.hpp | 4 +-- lib/fdmt_gpu.cu | 22 ++++++++++++++++ pyproject.toml | 2 +- 7 files changed, 60 insertions(+), 37 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b88018..32c1bdd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_policy(SET CMP0148 OLD) project( dmt - VERSION 0.1.0 + VERSION 0.2.0 LANGUAGES CXX ) diff --git a/README.md b/README.md index 3a5a9a4..981679a 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ [![GitHub CI](https://github.com/pravirkr/dmt/actions/workflows/ci.yml/badge.svg)](https://github.com/pravirkr/dmt/actions/workflows/ci.yml) [![codecov](https://codecov.io/gh/pravirkr/dmt/graph/badge.svg?token=17BGN5IIM9)](https://codecov.io/gh/pravirkr/dmt) +![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fpravirkr%2Fdmt%2Fmain%2Fpyproject.toml) +![C++ Version](https://img.shields.io/badge/C%2B%2B-17-blue) [![License](https://img.shields.io/github/license/pravirkr/dmt)](https://github.com/pravirkr/dmt/blob/main/LICENSE) ## Dispersion Measure Transforms @@ -34,6 +36,6 @@ dmt_transform = thefdmt.execute(frb.astype(np.float32)) f_min = 704.0, f_max = 1216.0, nchans = 4096, tsamp = 0.00008192, dt_max = 2048, nsamps = n; nthreads = 1, 8; ``` -![](bench/results/bench.png) +![](bench/results/bench_m1.png) diff --git a/include/dmt/fdmt_base.hpp b/include/dmt/fdmt_base.hpp index ec926fb..458d45c 100644 --- a/include/dmt/fdmt_base.hpp +++ b/include/dmt/fdmt_base.hpp @@ -5,10 +5,11 @@ #include #include -using SizeType = size_t; -using DtGridType = std::vector; +using SizeType = size_t; +using DtGridType = std::vector; +// state shape: nchans, ndt_min, ndt_max, ncoords, nsamps using StShapeType = std::array; -using FDMTCoordType = std::pair; // isub, i_dt +using FDMTCoordType = std::pair; // i_sub, i_dt struct FDMTCoordMapping { FDMTCoordType head; @@ -19,7 +20,6 @@ struct FDMTCoordMapping { struct FDMTPlan { std::vector df_top; std::vector df_bot; - // state shape: nchans, ndt_min, ndt_max, nchansxndt, nsamps std::vector state_shape; std::vector> coordinates; diff --git a/include/dmt/fdmt_gpu.hpp b/include/dmt/fdmt_gpu.hpp index 9d208cb..7a92c32 100644 --- a/include/dmt/fdmt_gpu.hpp +++ b/include/dmt/fdmt_gpu.hpp @@ -4,11 +4,32 @@ #include +using StShapeTypeD = int4; +using FDMTCoordTypeD = int2; + +struct FDMTCoordMappingD { + FDMTCoordTypeD head; + FDMTCoordTypeD tail; + SizeType offset; +}; + struct FDMTPlanD { - thrust::device_vector state_shape_d; - thrust::device_vector state_idx_d; - thrust::device_vector dt_grid_d; - thrust::device_vector dt_plan_d; + thrust::device_vector nsubs_d; + thrust::device_vector ncoords_d; + thrust::device_vector ncoords_to_copy_d; + thrust::device_vector nsubs_cumul_d; + thrust::device_vector ncoords_cumul_d; + thrust::device_vector ncoords_to_copy_cumul_d; + // i = i_iter + thrust::device_vector state_shape_d; + // i = i_iter * ncoords_cumul_iter + i_coord + thrust::device_vector coordinates_d; + thrust::device_vector mappings_d; + // i = i_iter * ncoords_to_copy_cumul_iter + i_coord_to_copy + thrust::device_vector coordinates_to_copy_d; + thrust::device_vector mappings_to_copy_d; + // i = i_iter * nsubs_cumul_iter + isub + thrust::device_vector state_sub_idx_d; }; class FDMTGPU : public FDMT { @@ -25,28 +46,6 @@ class FDMTGPU : public FDMT { FDMTPlanD m_fdmt_plan_d; - FDMTPlanD transfer_plan_to_device() { - const auto& plan = get_plan(); - FDMTPlanD plan_d; - for (const auto& state_shape_iter : plan.state_shape) { - for (const auto& shape : state_shape_iter) { - plan_d.state_shape_d.push_back(shape); - } - } - // flatten sub_plan and transfer to device - for (const auto& sub_plan_iter : plan.sub_plan) { - for (const auto& sub_plan : sub_plan_iter) { - plan_d.state_idx_d.push_back(sub_plan.state_idx); - for (const auto& dt : sub_plan.dt_grid) { - plan_d.dt_grid_d.push_back(dt); - } - for (const auto& dt_tuple : sub_plan.dt_plan) { - for (const auto& idt : dt_tuple) { - plan_d.dt_plan_d.push_back(idt); - } - } - } - } - return plan_d; - }; + static void transfer_plan_to_device(const FDMTPlan& plan, + FDMTPlanD& plan_d); }; \ No newline at end of file diff --git a/lib/dmt/fdmt_utils.hpp b/lib/dmt/fdmt_utils.hpp index 690c865..85bd47c 100644 --- a/lib/dmt/fdmt_utils.hpp +++ b/lib/dmt/fdmt_utils.hpp @@ -5,8 +5,8 @@ constexpr float kDispCoeff = -2.0; constexpr float kDispConstLK = 4.1488080e3; // L&K Handbook of Pulsar Astronomy -constexpr float kDispConstMT = 4.1493774e3; // TEMPO2, Manchester&Taylor (1972) -constexpr float kDispConstSI = 4.1488066e3; // SI value, Kulkarni (2020) +constexpr float kDispConstMT = 1 / 2.41e-4; // TEMPO2, Manchester&Taylor (1972) +constexpr float kDispConstSI = 4.1488064e3; // SI value, Kulkarni (2020) constexpr float kDispConst = kDispConstMT; namespace fdmt { diff --git a/lib/fdmt_gpu.cu b/lib/fdmt_gpu.cu index 3380ea7..322087f 100644 --- a/lib/fdmt_gpu.cu +++ b/lib/fdmt_gpu.cu @@ -1,2 +1,24 @@ +#include + #include +FDMTGPU::FDMTGPU(float f_min, float f_max, size_t nchans, size_t nsamps, + float tsamp, size_t dt_max, size_t dt_step, size_t dt_min) + : FDMT(f_min, f_max, nchans, nsamps, tsamp, dt_max, dt_step, dt_min) { + // Allocate memory for the state buffers + const auto& plan = get_plan(); + const auto state_size = plan.state_shape[0][3] * plan.state_shape[0][4]; + m_state_in_d.resize(state_size, 0.0F); + m_state_out_d.resize(state_size, 0.0F); + transfer_plan_to_device(plan, m_fdmt_plan_d); +} + +void FDMTGPU::transfer_plan_to_device(const FDMTPlan& plan, FDMTPlanD& plan_d) { + // Transfer the plan to the device + const auto niter = plan.state_shape.size(); + plan_d.state_shape_d.resize(niter); + for (size_t i = 0; i < niter; ++i) { + const auto& shape = plan.state_shape[i]; + plan_d.state_shape_d[i] = make_int4(shape[0], shape[1], shape[2], shape[3]); + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b63b73c..8f3c82a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build" [project] name = "dmt" -version = "0.1.0" +version = "0.2.0" description = "Dispersion Measure Transforms" readme = "README.md" license = { text = "MIT" }