Skip to content

Commit

Permalink
first
Browse files Browse the repository at this point in the history
  • Loading branch information
Iourick committed May 22, 2024
1 parent 6931117 commit cd61edc
Show file tree
Hide file tree
Showing 8 changed files with 1,749 additions and 57 deletions.
9 changes: 6 additions & 3 deletions include/dmt/fdmt_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,18 @@ class FDMT {
virtual ~FDMT() = default;
float get_df() const;
float get_correction() const;
int get_m_nsamps() const;
SizeType get_niters() const;
const FDMTPlan& get_plan() const;
const DtGridType& get_dt_grid_final() const;
std::vector<float> get_dm_grid_final() const;
static void set_log_level(int level);
virtual void execute(const float* waterfall, size_t waterfall_size,
float* dmt, size_t dmt_size) = 0;
virtual void initialise(const float* waterfall, float* state) = 0;
virtual void execute(const float* __restrict waterfall, size_t waterfall_size,
float* __restrict dmt, size_t dmt_size) = 0;
virtual void initialise(const float* __restrict waterfall, float* __restrict state) = 0;



protected:
void check_inputs(size_t waterfall_size, size_t dmt_size) const;

Expand Down
8 changes: 4 additions & 4 deletions include/dmt/fdmt_cpu.hpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#pragma once

#include <dmt/fdmt_base.hpp>
#include <fdmt_base.hpp>

class FDMTCPU : public FDMT {
public:
FDMTCPU(float f_min, float f_max, size_t nchans, size_t nsamps, float tsamp,
size_t dt_max, size_t dt_step = 1, size_t dt_min = 0);
static void set_num_threads(int nthreads);
void execute(const float* waterfall, size_t waterfall_size, float* dmt,
void execute(const float* __restrict waterfall, size_t waterfall_size, float* __restrict dmt,
size_t dmt_size) override;
void initialise(const float* waterfall, float* state) override;
void initialise(const float* __restrict waterfall, float* __restrict state) override;

private:
// Buffers
std::vector<float> m_state_in;
std::vector<float> m_state_out;

void execute_iter(const float* state_in, float* state_out, SizeType i_iter);
void execute_iter(const float* __restrict state_in, float* __restrict state_out, SizeType i_iter);
};
126 changes: 126 additions & 0 deletions include/dmt/fdmt_gpu.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#pragma once

#include <thrust/device_vector.h>
#include <fdmt_base.hpp>

using StShapeTypeD = int4;
using FDMTCoordTypeD = int2;

struct FDMTCoordMappingD {
FDMTCoordTypeD head;
FDMTCoordTypeD tail;
int offset;
};

struct FDMTPlanD
{
// is analogue of std::vector<std::vector<FDMTCoordType>> coordinates;
// "coordinates_d" is flattened vector "coordinates"
// in vector "len_inner_vects_coordinates_cumsum" we store cummulative sums of elements inner vectors of "coordinates"
// So,
// len_inner_vects_coordinates_cumsum[0] = 0
// len_inner_vects_coordinates_cumsum[1] = len_inner_vects_coordinates_cumsum[0] + coordinates[0].size() *2
// ...
// len_inner_vects_coordinates_cumsum[n] = len_inner_vects_coordinates_cumsum[n-1] + coordinates[n-1].size() *2
// ...
// Remember that always: len_inner_vects_coordinates_cumsum.size() = NUmIter +1
thrust::device_vector<int> coordinates_d;
std::vector<int> lenof_innerVects_coords_cumsum_h;

// Is an analogues as previous
thrust::device_vector<int> coordinates_to_copy_d;
std::vector<int> lenof_innerVects_coords_to_copy_cumsum_h;


// It is analogue of: std::vector<std::vector<FDMTCoordMapping>> mappings;
// each FDMTCoordMapping consists of 5 elements
// "mappings_d" is flattened vector "mappings"
// in vector "len_mappings_cumsum" we store cummulative sums of elements inner vectors of "mappings"
// So,
// len_mappings_cumsum[0] = 0
// len_mappings_cumsum[1] = len_mappings_cumsum[0] + mappings[0].size() *5
// ...
// len_mappings_cumsum[n] = len_mappings_cumsum[n-1] + mappings[n-1].size() *5
// ...
// Remember that always: len_mappings_cumsum.size() = NUmIter +1
thrust::device_vector<int> mappings_d;
std::vector<int> len_mappings_cumsum_h;


// Is an analogues as previous
thrust::device_vector<int> mappings_to_copy_d;
std::vector<int> len_mappings_to_copy_cumsum_h;


// It is analogue of state_sub_idx
// Has size: state_sub_idx_d.size() = m_niters +1
thrust::device_vector<int>state_sub_idx_d;
std::vector<int> len_state_sub_idx_cumsum_h;


// It is analogue of dt_grid
// Has size: dt_grid_d.size() = m_niters +1
thrust::device_vector<int>dt_grid_d;
thrust::device_vector<int> pos_gridInnerVects_d;
std::vector<int> pos_gridSubVects_h;

};
class FDMTGPU : public FDMT {
public:
FDMTGPU(float f_min, float f_max, size_t nchans, size_t nsamps, float tsamp,
size_t dt_max, size_t dt_step = 1, size_t dt_min = 0);
void execute(const float* __restrict waterfall, size_t waterfall_size, float* __restrict dmt,
size_t dmt_size) override;




private:
thrust::device_vector<float> m_state_in_d;
thrust::device_vector<float> m_state_out_d;
thrust::device_vector<int> m_nsamps_d;

FDMTPlanD m_fdmt_plan_d;

static void transfer_plan_to_device(const FDMTPlan& plan,
FDMTPlanD& plan_d);
void initialise(const float* __restrict waterfall, float* __restrict state) override;

};

std::vector<SizeType> flatten_mappings(const std::vector<std::vector<FDMTCoordMapping>>& mappings);

__global__
void kernel_init_fdmt(const float* __restrict waterfall, int* __restrict p_state_sub_idx
, int* __restrict p_dt_grid, int* __restrict p_pos_gridInnerVects, float* __restrict state, const int nsamps);

__global__
void kernel_init_fdmt_v1(const float* __restrict waterfall, int* __restrict p_state_sub_idx
, int* __restrict p_dt_grid, int* __restrict p_pos_gridInnerVects, float* __restrict state, const int nsamps);

__global__
void kernel_execute_iter(const float* __restrict state_in, float* __restrict state_out
, int* __restrict pcoords_cur//+
, int* __restrict pmappings_cur //+
, int* __restrict pcoords_copy_cur //+
, int* __restrict pmappings_copy_cur //+
, int* __restrict pstate_sub_idx_cur //+
, int* __restrict pstate_sub_idx_prev//+
, int nsamps //+
, int coords_cur_size//+
, int coords_copy_cur_size//+
);


__global__
void kernel_execute_iter_v1(const float* __restrict state_in, float* __restrict state_out
, int* __restrict pcoords_cur//+
, int* __restrict pmappings_cur //+
, int* __restrict pcoords_copy_cur //+
, int* __restrict pmappings_copy_cur //+
, int* __restrict pstate_sub_idx_cur //+
, int* __restrict pstate_sub_idx_prev//+
, int nsamps //+
, int coords_cur_size//+
, int coords_copy_cur_size//+
);
149 changes: 149 additions & 0 deletions include/dmt/fdmt_v1_gpu.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#pragma once

#include <thrust/device_vector.h>

#include <fdmt_base.hpp>

using StShapeTypeD = int4;
using FDMTCoordTypeD = int2;

struct FDMTCoordMappingD {
FDMTCoordTypeD head;
FDMTCoordTypeD tail;
int offset;
};

struct FDMTPlan_D
{

// is analogue of std::vector<std::vector<FDMTCoordType>> coordinates;
// "coordinates_d" is flattened vector "coordinates"
// in vector "len_inner_vects_coordinates_cumsum" we store cummulative sums of elements inner vectors of "coordinates"
// So,
// len_inner_vects_coordinates_cumsum[0] = 0
// len_inner_vects_coordinates_cumsum[1] = len_inner_vects_coordinates_cumsum[0] + coordinates[0].size() *2
// ...
// len_inner_vects_coordinates_cumsum[n] = len_inner_vects_coordinates_cumsum[n-1] + coordinates[n-1].size() *2
// ...
// Remember that always: len_inner_vects_coordinates_cumsum.size() = NUmIter +1
thrust::device_vector<int> coordinates_d;
thrust::device_vector<int> lenof_innerVects_coords_cumsum_d;

// Is an analogues as previous
thrust::device_vector<int> coordinates_to_copy_d;
thrust::device_vector<int> lenof_innerVects_coords_to_copy_cumsum_d;


// It is analogue of: std::vector<std::vector<FDMTCoordMapping>> mappings;
// each FDMTCoordMapping consists of 5 elements
// "mappings_d" is flattened vector "mappings"
// in vector "len_mappings_cumsum" we store cummulative sums of elements inner vectors of "mappings"
// So,
// len_mappings_cumsum[0] = 0
// len_mappings_cumsum[1] = len_mappings_cumsum[0] + mappings[0].size() *5
// ...
// len_mappings_cumsum[n] = len_mappings_cumsum[n-1] + mappings[n-1].size() *5
// ...
// Remember that always: len_mappings_cumsum.size() = NUmIter +1
thrust::device_vector<int> mappings_d;
thrust::device_vector<int> len_mappings_cumsum_d;


// Is an analogues as previous
thrust::device_vector<int> mappings_to_copy_d;
thrust::device_vector<int> len_mappings_to_copy_cumsum_d;


// It is analogue of state_sub_idx
// Has size: state_sub_idx_d.size() = m_niters +1
thrust::device_vector<int>state_sub_idx_d;
thrust::device_vector<int> len_state_sub_idx_cumsum_d;


// It is analogue of dt_grid
// Has size: dt_grid_d.size() = m_niters +1
thrust::device_vector<int>dt_grid_d;
thrust::device_vector<int> pos_gridInnerVects_d;
std::vector<int> pos_gridSubVects_h;

};
class FDMT_v1_GPU : public FDMT {
public:
FDMT_v1_GPU(float f_min, float f_max, size_t nchans, size_t nsamps, float tsamp,
size_t dt_max, size_t dt_step = 1, size_t dt_min = 0);
void execute(const float* __restrict waterfall, size_t waterfall_size, float* __restrict dmt,
size_t dmt_size) override;




private:
thrust::device_vector<float> m_state_in_d;
thrust::device_vector<float> m_state_out_d;

FDMTPlan_D m_fdmt_plan_d;
thrust::device_vector<int> m_nsamps_d;

static void transfer_plan_to_device(const FDMTPlan& plan,
FDMTPlan_D& plan_d);
void initialise(const float* __restrict waterfall, float* __restrict state) override;

};

std::vector<SizeType> flatten_mappings_(const std::vector<std::vector<FDMTCoordMapping>>& mappings);

__global__
void kernel_init_fdmt_(const float* __restrict waterfall, int* __restrict p_state_sub_idx
, int* __restrict p_dt_grid, int* __restrict p_pos_gridInnerVects, float* __restrict state, const int nsamps);

__global__
void kernel_init_fdmt_v1_(const float* __restrict waterfall, int* __restrict p_state_sub_idx
, int* __restrict p_dt_grid, int* __restrict p_pos_gridInnerVects, float* __restrict state, int *pnsamps);

__global__
void kernel_execute_iter_(const float* __restrict state_in, float* __restrict state_out
, int* __restrict pcoords_cur//+
, int* __restrict pmappings_cur //+
, int* __restrict pcoords_copy_cur //+
, int* __restrict pmappings_copy_cur //+
, int* __restrict pstate_sub_idx_cur //+
, int* __restrict pstate_sub_idx_prev//+
, int &nsamps //+
, int coords_cur_size//+
, int coords_copy_cur_size//+
);


__global__
void kernel_execute_iter_v1_(const float* __restrict state_in, float* __restrict state_out
, int* __restrict pcoords_cur//+
, int* __restrict pmappings_cur //+
, int* __restrict pcoords_copy_cur //+
, int* __restrict pmappings_copy_cur //+
, int* __restrict pstate_sub_idx_cur //+
, int* __restrict pstate_sub_idx_prev//+
, int *pnsamps //+
, int coords_cur_size//+
, int coords_copy_cur_size//+
);

__global__
void kernel_execute_iter_v2_(const float* __restrict state_in
, float* __restrict state_out
, const int i_iter
, int* __restrict pcoords
, int* __restrict pcoords_cumsum
, int* __restrict pcoords_to_copy
, int* __restrict pcoords_to_copy_cumsum
, int* __restrict pmappings
, int* __restrict pmappings_cumsum


, int* __restrict pmappings_copy
, int* __restrict pmappings_to_copy_cumsum
, int* __restrict pstate_sub_idx
, int* __restrict pstate_sub_idx_cumsum

, int* pnsamps //+
);

12 changes: 7 additions & 5 deletions lib/fdmt_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#include <cstdint>
#include <numeric>
#include <spdlog/spdlog.h>

#include "dmt/fdmt_utils.hpp"
#include <dmt/fdmt_base.hpp>
#include <stdexcept> // For std::invalid_argument
#include "fdmt_utils.hpp"
#include <fdmt_base.hpp>

size_t FDMTPlan::calculate_memory_usage() const {
size_t mem_use = 0;
Expand Down Expand Up @@ -48,13 +48,15 @@ FDMT::FDMT(float f_min, float f_max, SizeType nchans, SizeType nsamps,
m_correction(m_df / 2),
m_niters(calculate_niters(m_nchans)) {
configure_fdmt_plan();
spdlog::debug("FDMT: df={}, dt_max={}, dt_min={}, dt_step={}, niters={}",
m_df, m_dt_max, m_dt_min, m_dt_step, m_niters);
/* spdlog::debug("FDMT: df={}, dt_max={}, dt_min={}, dt_step={}, niters={}",
m_df, m_dt_max, m_dt_min, m_dt_step, m_niters);*/
}

// Getters
float FDMT::get_df() const { return m_df; }
float FDMT::get_correction() const { return m_correction; }
int FDMT::get_m_nsamps() const { return static_cast<int>(m_nsamps); };

SizeType FDMT::get_niters() const { return m_niters; }
const FDMTPlan& FDMT::get_plan() const { return m_fdmt_plan; }
const DtGridType& FDMT::get_dt_grid_final() const {
Expand Down
Loading

0 comments on commit cd61edc

Please sign in to comment.