From fc897512a029cee3a57b299c9dd4b93ab6bbff02 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 11:13:34 -0500 Subject: [PATCH 01/31] Initial conversion of all FFT_ settings in the KOKKOS subdirectory to FFT_KOKKOS_ --- src/KOKKOS/fft3d_kokkos.cpp | 84 ++++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 10 ++--- src/KOKKOS/fftdata_kokkos.h | 86 ++++++++++++++++++------------------- src/KOKKOS/pppm_kokkos.cpp | 4 +- src/KOKKOS/pppm_kokkos.h | 32 +++++++------- 5 files changed, 108 insertions(+), 108 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 82e4140f779..ca3d18e11a5 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -44,20 +44,20 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int int ngpus = lmp->kokkos->ngpus; ExecutionSpace execution_space = ExecutionSpaceFromDevice::space; -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs"); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs"); -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_KISSFFT) +#elif defined(FFT_KOKKOS_KISSFFT) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread // stack size on GPUs needs to be increased to prevent stack overflows @@ -149,20 +149,20 @@ struct norm_functor { KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT) +#if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#elif defined(FFT_MKL) +#elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KISS +#else // FFT_KOKKOS_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif } }; -#ifdef FFT_KISSFFT +#ifdef FFT_KOKKOS_KISSFFT template struct kiss_fft_functor { public: @@ -219,19 +219,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total1; length = plan->length1; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_fast,d_data.data()); else DftiComputeBackward(plan->handle_fast,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = @@ -265,19 +265,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total2; length = plan->length2; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_mid,d_data.data()); else DftiComputeBackward(plan->handle_mid,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -309,19 +309,19 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total3; length = plan->length3; - #if defined(FFT_MKL) + #if defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_slow,d_data.data()); else DftiComputeBackward(plan->handle_slow,d_data.data()); - #elif defined(FFT_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - #elif defined(FFT_CUFFT) + #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); - #elif defined(FFT_HIPFFT) + #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -609,46 +609,46 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl // system specific pre-computation of 1d FFT coeffs // and scaling normalization -#if defined(FFT_MKL) - DftiCreateDescriptor( &(plan->handle_fast), FFT_MKL_PREC, DFTI_COMPLEX, 1, +#if defined(FFT_KOKKOS_MKL) + DftiCreateDescriptor( &(plan->handle_fast), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total1/nfast); DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_fast); - DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, + DftiCreateDescriptor( &(plan->handle_mid), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total2/nmid); DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_mid); - DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, + DftiCreateDescriptor( &(plan->handle_slow), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total3/nslow); DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); -#if defined(FFT_MKL_THREADS) +#if defined(FFT_KOKKOS_MKL_THREADS) DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); #endif DftiCommitDescriptor(plan->handle_slow); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) -#if defined (FFT_FFTW_THREADS) +#if defined (FFT_KOKKOS_FFTW_THREADS) if (nthreads > 1) { FFTW_API(init_threads)(); FFTW_API(plan_with_nthreads)(nthreads); @@ -692,7 +692,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl nullptr,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -709,7 +709,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -726,7 +726,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, HIPFFT_TYPE,plan->total3/plan->length3); -#else /* FFT_KISS */ +#else /* FFT_KOKKOS_KISS */ kissfftKK = new KissFFTKokkos(); @@ -781,11 +781,11 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk if (plan->mid2_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->mid2_plan); if (plan->post_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->post_plan); -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); FFTW_API(destroy_plan)(plan->plan_mid_forward); @@ -793,11 +793,11 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk FFTW_API(destroy_plan)(plan->plan_fast_forward); FFTW_API(destroy_plan)(plan->plan_fast_backward); -#if defined (FFT_FFTW_THREADS) +#if defined (FFT_KOKKOS_FFTW_THREADS) FFTW_API(cleanup_threads)(); #endif -#elif defined (FFT_KISSFFT) +#elif defined (FFT_KOKKOS_KISSFFT) delete kissfftKK; #endif @@ -855,7 +855,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set -#if defined(FFT_MKL) || defined(FFT_FFTW3) +#if defined(FFT_KOKKOS_MKL) || defined(FFT_KOKKOS_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif @@ -866,7 +866,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,d_data.data()); DftiComputeForward(plan->handle_mid,d_data.data()); @@ -876,7 +876,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ DftiComputeBackward(plan->handle_mid,d_data.data()); DftiComputeBackward(plan->handle_slow,d_data.data()); } -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) if (flag == -1) { FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); @@ -886,11 +886,11 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); } -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index a0489f69bbd..ed49c4b1eed 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -45,22 +45,22 @@ struct fft_plan_3d_kokkos { double norm; // normalization factor for rescaling // system specific 1d FFT info -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward; FFTW_API(plan) plan_mid_forward; FFTW_API(plan) plan_mid_backward; FFTW_API(plan) plan_slow_forward; FFTW_API(plan) plan_slow_backward; -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) cufftHandle plan_fast; cufftHandle plan_mid; cufftHandle plan_slow; -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) hipfftHandle plan_fast; hipfftHandle plan_mid; hipfftHandle plan_slow; @@ -92,7 +92,7 @@ class FFT3dKokkos : protected Pointers { struct fft_plan_3d_kokkos *plan; RemapKokkos *remapKK; -#ifdef FFT_KISSFFT +#ifdef FFT_KOKKOS_KISSFFT KissFFTKokkos *kissfftKK; #endif diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index a3812a1cf0b..bed2c7faf0b 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -29,10 +29,10 @@ // Data types for single-precision complex -#if FFT_PRECISION == 1 -#elif FFT_PRECISION == 2 +#if FFT_KOKKOS_PRECISION == 1 +#elif FFT_KOKKOS_PRECISION == 2 #else -#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" +#error "FFT_KOKKOS_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif @@ -41,70 +41,70 @@ // FFTs here, since they may be valid in fft3d.cpp #ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #else -# if defined(FFT_CUFFT) -# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" +# if defined(FFT_KOKKOS_CUFFT) +# error "Must enable CUDA with KOKKOS to use -DFFT_KOKKOS_CUFFT" # endif -# if defined(FFT_HIPFFT) -# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT" +# if defined(FFT_KOKKOS_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_KOKKOS_HIPFFT" # endif // if user set FFTW, it means FFTW3 -# ifdef FFT_FFTW -# define FFT_FFTW3 +# ifdef FFT_KOKKOS_FFTW +# define FFT_KOKKOS_FFTW3 # endif -# ifdef FFT_FFTW_THREADS -# if !defined(FFT_FFTW3) -# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS" +# ifdef FFT_KOKKOS_FFTW_THREADS +# if !defined(FFT_KOKKOS_FFTW3) +# error "Must use -DFFT_KOKKOS_FFTW3 with -DFFT_KOKKOS_FFTW_THREADS" # endif # endif #endif -#if defined(FFT_MKL) +#if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) typedef float _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_SINGLE + #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else typedef double _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_DOUBLE + #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE #endif -#elif defined(FFT_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) typedef fftwf_complex FFT_DATA; #define FFTW_API(function) fftwf_ ## function #else typedef fftw_complex FFT_DATA; #define FFTW_API(function) fftw_ ## function #endif -#elif defined(FFT_CUFFT) +#elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define cufftExec cufftExecC2C #define CUFFT_TYPE CUFFT_C2C typedef cufftComplex FFT_DATA; @@ -113,9 +113,9 @@ #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_DATA; #endif -#elif defined(FFT_HIPFFT) +#elif defined(FFT_KOKKOS_HIPFFT) #include - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C #define HIPFFT_TYPE HIPFFT_C2C typedef hipfftComplex FFT_DATA; @@ -125,7 +125,7 @@ typedef hipfftDoubleComplex FFT_DATA; #endif #else - #if defined(FFT_SINGLE) + #if defined(FFT_KOKKOS_SINGLE) #define kiss_fft_scalar float #else #define kiss_fft_scalar double @@ -134,13 +134,13 @@ kiss_fft_scalar re; kiss_fft_scalar im; } FFT_DATA; - #ifndef FFT_KISSFFT - #define FFT_KISSFFT + #ifndef FFT_KOKKOS_KISSFFT + #define FFT_KOKKOS_KISSFFT #endif #endif // (double[2]*) is not a 1D pointer -#if defined(FFT_FFTW3) +#if defined(FFT_KOKKOS_FFTW3) typedef FFT_SCALAR* FFT_DATA_POINTER; #else typedef FFT_DATA* FFT_DATA_POINTER; @@ -216,7 +216,7 @@ typedef struct FFTArrayTypes FFT_DAT; typedef struct FFTArrayTypes FFT_HAT; -#if defined(FFT_KISSFFT) +#if defined(FFT_KOKKOS_KISSFFT) #include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 912ae36f6f3..17a9c82bdb8 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -48,7 +48,7 @@ using namespace MathSpecialKokkos; enum{REVERSE_RHO}; enum{FORWARD_IK,FORWARD_IK_PERATOM}; -#ifdef FFT_SINGLE +#ifdef FFT_KOKKOS_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else @@ -2390,7 +2390,7 @@ void PPPMKokkos::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); -#ifdef FFT_SINGLE +#ifdef FFT_KOKKOS_SINGLE s += powf(0.5,(float) l+1) * (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); #else diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index d621313873f..14d4670dbdf 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -36,30 +36,30 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos); // fix up FFT defines for KOKKOS with CUDA and HIP #ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_FFTW) -# undef FFT_FFTW +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW # endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 # endif -# if defined(FFT_MKL) -# undef FFT_MKL +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT # endif #endif From 0e98e706c69881f5562ce12285f80687ac8baf6c Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 11:49:54 -0500 Subject: [PATCH 02/31] Added LMPFFT settings for Kokkos-specific FFTs --- src/KOKKOS/lmpfftsettings_kokkos.h | 54 ++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/KOKKOS/lmpfftsettings_kokkos.h diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h new file mode 100644 index 00000000000..148e001de38 --- /dev/null +++ b/src/KOKKOS/lmpfftsettings_kokkos.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// common FFT library related defines and compilation settings + +#ifndef LMP_FFT_KOKKOS_SETTINGS_H +#define LMP_FFT_KOKKOS_SETTINGS_H + +// if user set FFTW, it means FFTW3 + +#ifdef FFT_KOKKOS_FFTW +#ifndef FFT_KOKKOS_FFTW3 +#define FFT_KOKKOS_FFTW3 +#endif +#endif + +// set strings for library info output + +#if defined(FFT_KOKKOS_FFTW3) +#define LMP_FFT_KOKKOS_LIB "FFTW3" +#elif defined(FFT_KOKKOS_MKL) +#define LMP_FFT_KOKKOS_LIB "MKL FFT" +#elif defined(FFT_KOKKOS_CUFFT) +#define LMP_FFT_KOKKOS_LIB "cuFFT" +#elif defined(FFT_KOKKOS_HIPFFT) +#define LMP_FFT_KOKKOS_LIB "hipFFT" +#else +#define LMP_FFT_KOKKOS_LIB "KISS FFT" +#endif + +#ifdef FFT_KOKKOS_SINGLE +typedef float FFT_KOKKOS_SCALAR; +#define FFT_KOKKOS_PRECISION 1 +#define LMP_FFT_KOKKOS_PREC "single" +#define MPI_FFT_KOKKOS_SCALAR MPI_FLOAT +#else + +typedef double FFT_KOKKOS_SCALAR; +#define FFT_KOKKOS_PRECISION 2 +#define LMP_FFT_KOKKOS_PREC "double" +#define MPI_FFT_KOKKOS_SCALAR MPI_DOUBLE +#endif + +#endif From 48ef968dd21c4e474fdbf927a89b0585f5235568 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 12:01:06 -0500 Subject: [PATCH 03/31] Decoupled all variable typenames from the non-kokkos FFT --- src/KOKKOS/fft3d_kokkos.cpp | 120 +++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 16 ++--- src/KOKKOS/fftdata_kokkos.h | 90 +++++++++++++------------- src/KOKKOS/grid3d_kokkos.cpp | 28 ++++---- src/KOKKOS/grid3d_kokkos.h | 14 ++-- src/KOKKOS/kissfft_kokkos.h | 76 +++++++++++----------- src/KOKKOS/kokkos_base_fft.h | 12 ++-- src/KOKKOS/pack_kokkos.h | 52 +++++++-------- src/KOKKOS/pppm_kokkos.cpp | 114 ++++++++++++++++----------------- src/KOKKOS/pppm_kokkos.h | 50 +++++++-------- src/KOKKOS/remap_kokkos.cpp | 20 +++--- src/KOKKOS/remap_kokkos.h | 20 +++--- 12 files changed, 306 insertions(+), 306 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index ca3d18e11a5..7093136fe6e 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -140,17 +140,17 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { #if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) - FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); + FFT_KOKKOS_SCALAR* out_ptr = (FFT_KOKKOS_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) @@ -167,14 +167,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ struct kiss_fft_functor { #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; - typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -226,16 +226,16 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_fast,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -272,15 +272,15 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_mid,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -316,15 +316,15 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_slow,d_data.data()); #elif defined(FFT_KOKKOS_FFTW3) if (flag == 1) - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -348,7 +348,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, // scaling if required if (flag == -1 && plan->scaled) { - FFT_SCALAR norm = plan->norm; + FFT_KOKKOS_SCALAR norm = plan->norm; int num = plan->normnum; norm_functor f(d_out,norm); @@ -443,7 +443,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->pre_plan = remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi,2,0,0,FFT_PRECISION, + first_klo,first_khi,2,0,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -468,7 +468,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi,2,1,0,FFT_PRECISION, + second_klo,second_khi,2,1,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->mid1_plan == nullptr) return nullptr; @@ -509,7 +509,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi,2,1,0,FFT_PRECISION, + third_ilo,third_ihi,2,1,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->mid2_plan == nullptr) return nullptr; @@ -537,7 +537,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, + out_jlo,out_jhi,2,(permute+1)%3,0,FFT_KOKKOS_PRECISION, usecollective,usecuda_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -697,34 +697,34 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - CUFFT_TYPE,plan->total1/plan->length1); + CUFFT_KOKKOS_TYPE,plan->total1/plan->length1); cufftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - CUFFT_TYPE,plan->total2/plan->length2); + CUFFT_KOKKOS_TYPE,plan->total2/plan->length2); cufftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - CUFFT_TYPE,plan->total3/plan->length3); + CUFFT_KOKKOS_TYPE,plan->total3/plan->length3); #elif defined(FFT_KOKKOS_HIPFFT) hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - HIPFFT_TYPE,plan->total1/plan->length1); + HIPFFT_KOKKOS_TYPE,plan->total1/plan->length1); hipfftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - HIPFFT_TYPE,plan->total2/plan->length2); + HIPFFT_KOKKOS_TYPE,plan->total2/plan->length2); hipfftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - HIPFFT_TYPE,plan->total3/plan->length3); + HIPFFT_KOKKOS_TYPE,plan->total3/plan->length3); #else /* FFT_KOKKOS_KISS */ @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -878,13 +878,13 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ } #elif defined(FFT_KOKKOS_FFTW3) if (flag == -1) { - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); } else { - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); } #elif defined(FFT_KOKKOS_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); @@ -923,7 +923,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // limit num to size of data if (flag == 1 && plan->scaled) { - FFT_SCALAR norm = plan->norm; + FFT_KOKKOS_SCALAR norm = plan->norm; int num = MIN(plan->normnum,nsize); norm_functor f(d_data,norm); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index ed49c4b1eed..bb552ec4efd 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -28,14 +28,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -79,14 +79,14 @@ class FFT3dKokkos : protected Pointers { public: enum{FORWARD=1,BACKWARD=-1}; typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); - void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); + void compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int); + void timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index bed2c7faf0b..a9ea2de8963 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -20,10 +20,10 @@ // data types for 2d/3d FFTs -#ifndef LMP_FFT_DATA_KOKKOS_H -#define LMP_FFT_DATA_KOKKOS_H +#ifndef LMP_FFT_KOKKOS_DATA_H +#define LMP_FFT_KOKKOS_DATA_H -#include "lmpfftsettings.h" +#include "lmpfftsettings_kokkos.h" // ------------------------------------------------------------------------- @@ -87,42 +87,42 @@ #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" #if defined(FFT_KOKKOS_SINGLE) - typedef float _Complex FFT_DATA; + typedef float _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else - typedef double _Complex FFT_DATA; + typedef double _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE #endif #elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" #if defined(FFT_KOKKOS_SINGLE) - typedef fftwf_complex FFT_DATA; + typedef fftwf_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftwf_ ## function #else - typedef fftw_complex FFT_DATA; + typedef fftw_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftw_ ## function #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" #if defined(FFT_KOKKOS_SINGLE) #define cufftExec cufftExecC2C - #define CUFFT_TYPE CUFFT_C2C - typedef cufftComplex FFT_DATA; + #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_C2C + typedef cufftComplex FFT_KOKKOS_DATA; #else #define cufftExec cufftExecZ2Z - #define CUFFT_TYPE CUFFT_Z2Z - typedef cufftDoubleComplex FFT_DATA; + #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_Z2Z + typedef cufftDoubleComplex FFT_KOKKOS_DATA; #endif #elif defined(FFT_KOKKOS_HIPFFT) #include #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_TYPE HIPFFT_C2C - typedef hipfftComplex FFT_DATA; + #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_C2C + typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_TYPE HIPFFT_Z2Z - typedef hipfftDoubleComplex FFT_DATA; + #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_Z2Z + typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else #if defined(FFT_KOKKOS_SINGLE) @@ -133,7 +133,7 @@ typedef struct { kiss_fft_scalar re; kiss_fft_scalar im; - } FFT_DATA; + } FFT_KOKKOS_DATA; #ifndef FFT_KOKKOS_KISSFFT #define FFT_KOKKOS_KISSFFT #endif @@ -141,9 +141,9 @@ // (double[2]*) is not a 1D pointer #if defined(FFT_KOKKOS_FFTW3) - typedef FFT_SCALAR* FFT_DATA_POINTER; + typedef FFT_KOKKOS_SCALAR* FFT_KOKKOS_DATA_POINTER; #else - typedef FFT_DATA* FFT_DATA_POINTER; + typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; #endif @@ -154,23 +154,23 @@ template <> struct FFTArrayTypes { typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; + DualView tdual_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev t_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev_um t_FFT_KOKKOS_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; +typedef tdual_FFT_KOKKOS_SCALAR_2d::t_dev t_FFT_KOKKOS_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; +typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_dev t_FFT_KOKKOS_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; +typedef tdual_FFT_KOKKOS_SCALAR_3d::t_dev t_FFT_KOKKOS_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; + DualView tdual_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_dev t_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_dev_um t_FFT_KOKKOS_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -186,23 +186,23 @@ struct FFTArrayTypes { //Kspace typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; + DualView tdual_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host t_FFT_KOKKOS_SCALAR_1d; +typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host_um t_FFT_KOKKOS_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; +typedef tdual_FFT_KOKKOS_SCALAR_2d::t_host t_FFT_KOKKOS_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; +typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_host t_FFT_KOKKOS_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; +typedef tdual_FFT_KOKKOS_SCALAR_3d::t_host t_FFT_KOKKOS_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; + DualView tdual_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_host t_FFT_KOKKOS_DATA_1d; +typedef tdual_FFT_KOKKOS_DATA_1d::t_host_um t_FFT_KOKKOS_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -212,12 +212,12 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif -typedef struct FFTArrayTypes FFT_DAT; -typedef struct FFTArrayTypes FFT_HAT; +typedef struct FFTArrayTypes FFT_KOKKOS_DAT; +typedef struct FFTArrayTypes FFT_KOKKOS_HAT; #if defined(FFT_KOKKOS_KISSFFT) -#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last +#include "kissfft_kokkos.h" // uses t_FFT_KOKKOS_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 9a82e0157db..0f8e0bdc4e7 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -636,7 +636,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) template void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -655,14 +655,14 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: forward_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -709,13 +709,13 @@ forward_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -776,7 +776,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, template void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -795,14 +795,14 @@ void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -850,14 +850,14 @@ reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: reverse_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_SCALAR* buf1; - FFT_SCALAR* buf2; + FFT_KOKKOS_SCALAR* buf1; + FFT_KOKKOS_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 19751d83c9c..8e9f6cd051f 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -27,16 +27,16 @@ class Grid3dKokkos : public Grid3d { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int); Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); ~Grid3dKokkos() override; void forward_comm(int, void *, int, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm(int, void *, int, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); private: DAT::tdual_int_2d k_swap_packlist; @@ -57,13 +57,13 @@ class Grid3dKokkos : public Grid3d { void setup_comm_tiled(int &, int &) override; void forward_comm_kspace_brick(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void forward_comm_kspace_tiled(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_brick(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_tiled(class KSpace *, int, int, - FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); void grow_swap() override; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 265677a21c7..fc23bf7891c 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -119,14 +119,14 @@ }while(0) */ -#define KISS_FFT_COS(phase) (FFT_SCALAR) cos(phase) -#define KISS_FFT_SIN(phase) (FFT_SCALAR) sin(phase) +#define KISS_FFT_KOKKOS_COS(phase) (FFT_KOKKOS_SCALAR) cos(phase) +#define KISS_FFT_KOKKOS_SIN(phase) (FFT_KOKKOS_SCALAR) sin(phase) #define HALF_OF(x) ((x)*.5) #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index).re = KISS_FFT_COS(phase);\ - (x)(x_index).im = KISS_FFT_SIN(phase);\ + (x)(x_index).re = KISS_FFT_KOKKOS_COS(phase);\ + (x)(x_index).im = KISS_FFT_KOKKOS_SIN(phase);\ }while(0) @@ -138,26 +138,26 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; int nfft; int inverse; - typename FFT_AT::t_int_64 d_factors; - typename FFT_AT::t_FFT_DATA_1d d_twiddles; - typename FFT_AT::t_FFT_DATA_1d d_scratch; + typename FFT_KOKKOS_AT::t_int_64 d_factors; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR t[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,11 +179,11 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR scratch[6][2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; @@ -237,14 +237,14 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR scratch[5][2]; - FFT_SCALAR epi3[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR scratch[5][2]; + FFT_KOKKOS_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); epi3[0] = d_twiddles(fstride*m).re; epi3[1] = d_twiddles(fstride*m).im; @@ -289,13 +289,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; - FFT_SCALAR scratch[13][2]; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR ya[2],yb[2]; + FFT_KOKKOS_SCALAR scratch[13][2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; ya[0] = d_twiddles(fstride*m).re; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_SCALAR t[2]; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_KOKKOS_SCALAR t[2]; int Norig = st.nfft; - typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) + const typename FFT_KOKKOS_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -452,7 +452,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_KOKKOS_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -496,12 +496,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); - typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); + typename FFT_KOKKOS_AT::tdual_int_64 k_factors = typename FFT_KOKKOS_AT::tdual_int_64(); + typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d(); if (nfft > 0) { - k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_KOKKOS_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 08369b3c78e..567dc02ff3a 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -12,8 +12,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_KOKKOS_BASE_FFT_H -#define LMP_KOKKOS_BASE_FFT_H +#ifndef LMP_KOKKOS_BASE_FFT_KOKKOS_H +#define LMP_KOKKOS_BASE_FFT_KOKKOS_H #include "fftdata_kokkos.h" @@ -24,10 +24,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} // Kspace - virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index fe90d294a67..97d35afe268 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -38,13 +38,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ struct pack_3d_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ struct pack_3d_functor { } }; -static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ struct unpack_3d_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ struct unpack_3d_functor { } }; -static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ struct unpack_3d_permute1_1_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ struct unpack_3d_permute1_1_functor { } }; -static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ struct unpack_3d_permute1_2_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ struct unpack_3d_permute1_2_functor { } }; -static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ struct unpack_3d_permute1_n_functor { int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ struct unpack_3d_permute1_n_functor { } }; -static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ struct unpack_3d_permute2_1_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ struct unpack_3d_permute2_1_functor { } }; -static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ struct unpack_3d_permute2_2_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ struct unpack_3d_permute2_2_functor { } }; -static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ struct unpack_3d_permute2_n_functor { int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ struct unpack_3d_permute2_n_functor { } }; -static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 17a9c82bdb8..3fc90c088d2 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -285,7 +285,7 @@ void PPPMKokkos::init() estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy/two_charge_force); - mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_LIB "\n"; + mesg += " using " LMP_FFT_KOKKOS_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -595,8 +595,8 @@ void PPPMKokkos::compute(int eflag, int vflag) // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); brick2fft(); // compute potential gradient on my FFT grid and @@ -609,14 +609,14 @@ void PPPMKokkos::compute(int eflag, int vflag) // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_KOKKOS_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); // calculate the force on my particles @@ -730,8 +730,8 @@ void PPPMKokkos::allocate() npergrid = 3; - k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,17 +775,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -810,7 +810,7 @@ void PPPMKokkos::allocate() remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); + 1,0,0,FFT_KOKKOS_PRECISION,collective_flag,gpu_aware_flag); } /* ---------------------------------------------------------------------- @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication @@ -862,8 +862,8 @@ void PPPMKokkos::allocate_peratom() npergrid = 7; - k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); } /* ---------------------------------------------------------------------- @@ -1234,14 +1234,14 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; @@ -1250,13 +1250,13 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; - const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; - const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; a_density_brick(mz,my,mx) += x0*d_rho1d(i,l+order/2,0); @@ -1294,9 +1294,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; ny -= nylo_out; @@ -1304,15 +1304,15 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; - const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; const int im = in+my*ix; - const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; const int il = im+mx; @@ -2040,8 +2040,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; + FFT_KOKKOS_SCALAR x0,y0,z0; + FFT_KOKKOS_SCALAR ekx,eky,ekz; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2100,8 +2100,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + FFT_KOKKOS_SCALAR dx,dy,dz,x0,y0,z0; + FFT_KOKKOS_SCALAR u,v0,v1,v2,v3,v4,v5; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2155,7 +2155,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2211,7 +2211,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2269,7 +2269,7 @@ void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2299,7 +2299,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2332,11 +2332,11 @@ void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) co template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) const +void PPPMKokkos::compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &dx, const FFT_KOKKOS_SCALAR &dy, + const FFT_KOKKOS_SCALAR &dz) const { int k,l; - FFT_SCALAR r1,r2,r3; + FFT_KOKKOS_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; @@ -2375,10 +2375,10 @@ template void PPPMKokkos::compute_rho_coeff() { int j,k,l,m; - FFT_SCALAR s; - FFT_SCALAR **a = new FFT_SCALAR *[order]; + FFT_KOKKOS_SCALAR s; + FFT_KOKKOS_SCALAR **a = new FFT_KOKKOS_SCALAR *[order]; for (int i = 0; i < order; ++i) - a[i] = new FFT_SCALAR[2*order+1]; + a[i] = new FFT_KOKKOS_SCALAR[2*order+1]; for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) @@ -2586,18 +2586,18 @@ double PPPMKokkos::memory_usage() double bytes = (double)nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += (double)4 * nbrick * sizeof(FFT_SCALAR); + bytes += (double)4 * nbrick * sizeof(FFT_KOKKOS_SCALAR); if (triclinic) bytes += (double)3 * nfft_both * sizeof(double); bytes += (double)6 * nfft_both * sizeof(double); bytes += (double)nfft_both * sizeof(double); - bytes += (double)nfft_both*5 * sizeof(FFT_SCALAR); + bytes += (double)nfft_both*5 * sizeof(FFT_KOKKOS_SCALAR); if (peratom_allocate_flag) - bytes += (double)6 * nbrick * sizeof(FFT_SCALAR); + bytes += (double)6 * nbrick * sizeof(FFT_KOKKOS_SCALAR); // two Grid3d bufs - bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR); + bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_KOKKOS_SCALAR); return bytes; } diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 14d4670dbdf..dc0fbd88e52 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -131,7 +131,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; PPPMKokkos(class LAMMPS *); ~PPPMKokkos() override; @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,31 +364,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; - FFT_DAT::tdual_FFT_SCALAR_1d k_work1; - FFT_DAT::tdual_FFT_SCALAR_1d k_work2; - typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; - typename FFT_AT::t_FFT_SCALAR_1d d_work1; - typename FFT_AT::t_FFT_SCALAR_1d d_work2; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; - //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; - FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; - typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; - FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; + //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; @@ -398,7 +398,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { RemapKokkos *remap; Grid3dKokkos *gc; - FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2; + FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_gc_buf1,k_gc_buf2; int ngc_buf1,ngc_buf2,npergrid; //int **part2grid; // storage for particle -> grid mapping @@ -429,17 +429,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void fieldforce_peratom() override; KOKKOS_INLINE_FUNCTION - void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &, - const FFT_SCALAR &) const; + void compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &, const FFT_KOKKOS_SCALAR &, + const FFT_KOKKOS_SCALAR &) const; void compute_rho_coeff(); void slabcorr() override; // grid communication - void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; - void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; // triclinic diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index efc6742a259..d6b8a5691c1 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // use point-to-point communication int i,isend,irecv; - typename FFT_AT::t_FFT_SCALAR_1d d_scratch; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -120,20 +120,20 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // post all recvs into scratch space - FFT_SCALAR* v_scratch = d_scratch.data(); + FFT_KOKKOS_SCALAR* v_scratch = d_scratch.data(); if (!plan->usecuda_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + FFT_KOKKOS_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_SCALAR,plan->recv_proc[irecv],0, + MPI_FFT_KOKKOS_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); } - FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); + FFT_KOKKOS_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usecuda_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); @@ -149,7 +149,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d if (!plan->usecuda_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_KOKKOS_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index a62c14f00b9..035b58260e8 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends - FFT_HAT::t_FFT_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - FFT_HAT::t_FFT_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + typedef FFTArrayTypes FFT_KOKKOS_AT; + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends + typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs + void (*pack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -61,16 +61,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_AT; + typedef FFTArrayTypes FFT_KOKKOS_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); + void perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 51eebf311fde2f3848feae35ce3692a35d90fe9f Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Wed, 6 Dec 2023 12:40:11 -0500 Subject: [PATCH 04/31] Added fft settings to Install.sh, fixed typo in HIPFFT_C2C and HIPFFT_Z2Z --- src/KOKKOS/Install.sh | 1 + src/KOKKOS/fftdata_kokkos.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 489efc55a04..ba6c4ed427c 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,6 +200,7 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h +action lmpfftsettings_kokkos.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index a9ea2de8963..d52bc0b968a 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -117,11 +117,11 @@ #include #if defined(FFT_KOKKOS_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_C2C + #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_KOKKOS_TYPE HIPFFT_KOKKOS_Z2Z + #define HIPFFT_KOKKOS_TYPE HIPFFT_Z2Z typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else From e80c3d3215a9de1f8e1c9041fc96c9713cb156ca Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 12 Dec 2023 12:20:24 -0700 Subject: [PATCH 05/31] Revert FFT_AT name change --- src/KOKKOS/fft3d_kokkos.cpp | 70 ++++++++++++++++++------------------- src/KOKKOS/fft3d_kokkos.h | 16 ++++----- src/KOKKOS/grid3d_kokkos.h | 2 +- src/KOKKOS/kissfft_kokkos.h | 52 +++++++++++++-------------- src/KOKKOS/pack_kokkos.h | 52 +++++++++++++-------------- src/KOKKOS/pppm_kokkos.cpp | 26 +++++++------- src/KOKKOS/pppm_kokkos.h | 24 ++++++------- src/KOKKOS/remap_kokkos.cpp | 10 +++--- src/KOKKOS/remap_kokkos.h | 16 ++++----- 9 files changed, 134 insertions(+), 134 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 7093136fe6e..9e7b87b8d8a 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALA /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -140,11 +140,11 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION @@ -167,14 +167,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ struct kiss_fft_functor { #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -234,8 +234,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -280,7 +280,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -324,7 +324,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FF hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = + typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index bb552ec4efd..9729bc6a63e 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -28,14 +28,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -79,14 +79,14 @@ class FFT3dKokkos : protected Pointers { public: enum{FORWARD=1,BACKWARD=-1}; typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int); - void timing1d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 8e9f6cd051f..864ac19c060 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -27,7 +27,7 @@ class Grid3dKokkos : public Grid3d { enum { KSPACE = 0, PAIR = 1, FIX = 2 }; // calling classes typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int); Grid3dKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index fc23bf7891c..66f32d29fb1 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -138,25 +138,25 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; int nfft; int inverse; - typename FFT_KOKKOS_AT::t_int_64 d_factors; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d d_scratch; + typename FFT_AT::t_int_64 d_factors; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,10 +179,10 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; @@ -237,12 +237,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR scratch[5][2]; FFT_KOKKOS_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); @@ -289,12 +289,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; FFT_KOKKOS_SCALAR scratch[13][2]; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; FFT_KOKKOS_SCALAR t[2]; int Norig = st.nfft; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) + const typename FFT_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -496,12 +496,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename FFT_KOKKOS_AT::tdual_int_64 k_factors = typename FFT_KOKKOS_AT::tdual_int_64(); - typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d(); + typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); + typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d(); if (nfft > 0) { - k_factors = typename FFT_KOKKOS_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_KOKKOS_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 97d35afe268..5e014db020e 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -38,13 +38,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ struct pack_3d_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ struct pack_3d_functor { } }; -static void pack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ struct unpack_3d_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ struct unpack_3d_functor { } }; -static void unpack_3d(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ struct unpack_3d_permute1_1_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ struct unpack_3d_permute1_1_functor { } }; -static void unpack_3d_permute1_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ struct unpack_3d_permute1_2_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ struct unpack_3d_permute1_2_functor { } }; -static void unpack_3d_permute1_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ struct unpack_3d_permute1_n_functor { int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ struct unpack_3d_permute1_n_functor { } }; -static void unpack_3d_permute1_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ struct unpack_3d_permute2_1_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ struct unpack_3d_permute2_1_functor { } }; -static void unpack_3d_permute2_1(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ struct unpack_3d_permute2_2_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ struct unpack_3d_permute2_2_functor { } }; -static void unpack_3d_permute2_2(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ struct unpack_3d_permute2_n_functor { int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ struct unpack_3d_permute2_n_functor { } }; -static void unpack_3d_permute2_n(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 3fc90c088d2..ed7ace08c16 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,16 +775,16 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index dc0fbd88e52..09513c9a2fb 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -131,7 +131,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; PPPMKokkos(class LAMMPS *); ~PPPMKokkos() override; @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,11 +364,11 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; @@ -377,17 +377,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index d6b8a5691c1..18ba6264605 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALA ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKK // use point-to-point communication int i,isend,irecv; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index 035b58260e8..ad5fa9833d2 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*pack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -61,16 +61,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes FFT_KOKKOS_AT; + typedef FFTArrayTypes FFT_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_KOKKOS_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 68c53886b8f52ec1f4af801783c87d368cef10e2 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Dec 2023 15:09:02 -0700 Subject: [PATCH 06/31] Revert some name changes --- src/KOKKOS/Install.sh | 2 +- src/KOKKOS/fft3d_kokkos.cpp | 88 +++++++++++----------- src/KOKKOS/fft3d_kokkos.h | 12 +-- src/KOKKOS/fftdata_kokkos.h | 80 ++++++++++---------- src/KOKKOS/grid3d_kokkos.cpp | 28 +++---- src/KOKKOS/grid3d_kokkos.h | 12 +-- src/KOKKOS/kissfft_kokkos.h | 64 ++++++++-------- src/KOKKOS/kokkos_base_fft.h | 12 +-- src/KOKKOS/lmpfftsettings_kokkos.h | 15 +--- src/KOKKOS/pack_kokkos.h | 48 ++++++------ src/KOKKOS/pppm_kokkos.cpp | 116 ++++++++++++++--------------- src/KOKKOS/pppm_kokkos.h | 48 ++++++------ src/KOKKOS/remap_kokkos.cpp | 20 ++--- src/KOKKOS/remap_kokkos.h | 16 ++-- 14 files changed, 275 insertions(+), 286 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ba6c4ed427c..2dcf49ce067 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,7 +200,7 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h -action lmpfftsettings_kokkos.h +action lmpfftsettings_kokkos.h lmpfftsettings.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 9e7b87b8d8a..d78239606e1 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,10 +88,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_KOKKOS_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,9 +99,9 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_ /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_KOKKOS_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -141,21 +141,21 @@ struct norm_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_out; + typename FFT_AT::t_FFT_DATA_1d_um d_out; int norm; - norm_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { #if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) - FFT_KOKKOS_SCALAR* out_ptr = (FFT_KOKKOS_SCALAR *)(d_out.data()+i); + FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KOKKOS_KISS +#else // FFT_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif @@ -168,13 +168,13 @@ struct kiss_fft_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_data,d_tmp; + typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() = default; - kiss_fft_functor(typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_data_,typename FFT_AT::t_FFT_KOKKOS_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -191,11 +191,11 @@ struct kiss_fft_functor { #endif template -void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_in, typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data,d_copy; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -204,9 +204,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -234,8 +234,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); kiss_fft_functor f; if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -251,9 +251,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -280,7 +280,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); else @@ -295,9 +295,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -324,7 +324,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 #elif defined(FFT_KOKKOS_HIPFFT) hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else - d_tmp = typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); else @@ -337,9 +337,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 // destination is always out if (plan->post_plan) { - d_data_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d((FFT_KOKKOS_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -348,7 +348,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1 // scaling if required if (flag == -1 && plan->scaled) { - FFT_KOKKOS_SCALAR norm = plan->norm; + FFT_SCALAR norm = plan->norm; int num = plan->normnum; norm_functor f(d_out,norm); @@ -443,7 +443,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->pre_plan = remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, - first_klo,first_khi,2,0,0,FFT_KOKKOS_PRECISION, + first_klo,first_khi,2,0,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -468,7 +468,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, - second_klo,second_khi,2,1,0,FFT_KOKKOS_PRECISION, + second_klo,second_khi,2,1,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->mid1_plan == nullptr) return nullptr; @@ -509,7 +509,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, - third_ilo,third_ihi,2,1,0,FFT_KOKKOS_PRECISION, + third_ilo,third_ihi,2,1,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->mid2_plan == nullptr) return nullptr; @@ -537,7 +537,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, - out_jlo,out_jhi,2,(permute+1)%3,0,FFT_KOKKOS_PRECISION, + out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, usecollective,usecuda_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -599,11 +599,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -697,17 +697,17 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - CUFFT_KOKKOS_TYPE,plan->total1/plan->length1); + CUFFT_TYPE,plan->total1/plan->length1); cufftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - CUFFT_KOKKOS_TYPE,plan->total2/plan->length2); + CUFFT_TYPE,plan->total2/plan->length2); cufftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - CUFFT_KOKKOS_TYPE,plan->total3/plan->length3); + CUFFT_TYPE,plan->total3/plan->length3); #elif defined(FFT_KOKKOS_HIPFFT) @@ -838,7 +838,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -896,8 +896,8 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKO hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_tmp = - typename FFT_AT::t_FFT_KOKKOS_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); @@ -923,7 +923,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKO // limit num to size of data if (flag == 1 && plan->scaled) { - FFT_KOKKOS_SCALAR norm = plan->norm; + FFT_SCALAR norm = plan->norm; int num = MIN(plan->normnum,nsize); norm_functor f(d_data,norm); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 9729bc6a63e..ed49c4b1eed 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -34,8 +34,8 @@ struct fft_plan_3d_kokkos { struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_copy; // memory for remap results (if needed) - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -85,8 +85,8 @@ class FFT3dKokkos : protected Pointers { int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int,int); ~FFT3dKokkos() override; - void compute(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int); - void timing1d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -96,7 +96,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -105,7 +105,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_KOKKOS_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index d52bc0b968a..15dca33bcc1 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -20,8 +20,8 @@ // data types for 2d/3d FFTs -#ifndef LMP_FFT_KOKKOS_DATA_H -#define LMP_FFT_KOKKOS_DATA_H +#ifndef LMP_FFT_DATA_KOKKOS_H +#define LMP_FFT_DATA_KOKKOS_H #include "lmpfftsettings_kokkos.h" @@ -29,10 +29,10 @@ // Data types for single-precision complex -#if FFT_KOKKOS_PRECISION == 1 -#elif FFT_KOKKOS_PRECISION == 2 +#if FFT_PRECISION == 1 +#elif FFT_PRECISION == 2 #else -#error "FFT_KOKKOS_PRECISION needs to be either 1 (=single) or 2 (=double)" +#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif @@ -86,7 +86,7 @@ #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) typedef float _Complex FFT_KOKKOS_DATA; #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE #else @@ -95,7 +95,7 @@ #endif #elif defined(FFT_KOKKOS_FFTW3) #include "fftw3.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) typedef fftwf_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftwf_ ## function #else @@ -104,18 +104,18 @@ #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define cufftExec cufftExecC2C - #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_C2C + #define CUFFT_TYPE CUFFT_C2C typedef cufftComplex FFT_KOKKOS_DATA; #else #define cufftExec cufftExecZ2Z - #define CUFFT_KOKKOS_TYPE CUFFT_KOKKOS_Z2Z + #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_KOKKOS_DATA; #endif #elif defined(FFT_KOKKOS_HIPFFT) #include - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define hipfftExec hipfftExecC2C #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; @@ -125,7 +125,7 @@ typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else - #if defined(FFT_KOKKOS_SINGLE) + #if defined(FFT_SINGLE) #define kiss_fft_scalar float #else #define kiss_fft_scalar double @@ -141,7 +141,7 @@ // (double[2]*) is not a 1D pointer #if defined(FFT_KOKKOS_FFTW3) - typedef FFT_KOKKOS_SCALAR* FFT_KOKKOS_DATA_POINTER; + typedef FFT_SCALAR* FFT_KOKKOS_DATA_POINTER; #else typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; #endif @@ -154,23 +154,23 @@ template <> struct FFTArrayTypes { typedef Kokkos:: - DualView tdual_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev t_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_dev_um t_FFT_KOKKOS_SCALAR_1d_um; + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; -typedef tdual_FFT_KOKKOS_SCALAR_2d::t_dev t_FFT_KOKKOS_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; -typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_dev t_FFT_KOKKOS_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; -typedef tdual_FFT_KOKKOS_SCALAR_3d::t_dev t_FFT_KOKKOS_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_dev t_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_dev_um t_FFT_KOKKOS_DATA_1d_um; + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -186,23 +186,23 @@ struct FFTArrayTypes { //Kspace typedef Kokkos:: - DualView tdual_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host t_FFT_KOKKOS_SCALAR_1d; -typedef tdual_FFT_KOKKOS_SCALAR_1d::t_host_um t_FFT_KOKKOS_SCALAR_1d_um; + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d; -typedef tdual_FFT_KOKKOS_SCALAR_2d::t_host t_FFT_KOKKOS_SCALAR_2d; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_2d_3; -typedef tdual_FFT_KOKKOS_SCALAR_2d_3::t_host t_FFT_KOKKOS_SCALAR_2d_3; +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; -typedef Kokkos::DualView tdual_FFT_KOKKOS_SCALAR_3d; -typedef tdual_FFT_KOKKOS_SCALAR_3d::t_host t_FFT_KOKKOS_SCALAR_3d; +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_host t_FFT_KOKKOS_DATA_1d; -typedef tdual_FFT_KOKKOS_DATA_1d::t_host_um t_FFT_KOKKOS_DATA_1d_um; + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; typedef Kokkos:: DualView tdual_int_64; @@ -212,12 +212,12 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif -typedef struct FFTArrayTypes FFT_KOKKOS_DAT; -typedef struct FFTArrayTypes FFT_KOKKOS_HAT; +typedef struct FFTArrayTypes FFT_DAT; +typedef struct FFTArrayTypes FFT_HAT; #if defined(FFT_KOKKOS_KISSFFT) -#include "kissfft_kokkos.h" // uses t_FFT_KOKKOS_DATA_1d, needs to come last +#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp index 0f8e0bdc4e7..9a82e0157db 100644 --- a/src/KOKKOS/grid3d_kokkos.cpp +++ b/src/KOKKOS/grid3d_kokkos.cpp @@ -636,7 +636,7 @@ void Grid3dKokkos::setup_comm_tiled(int &nbuf1, int &nbuf2) template void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, + FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -655,14 +655,14 @@ void Grid3dKokkos::forward_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: forward_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -709,13 +709,13 @@ forward_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -776,7 +776,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper, template void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d& k_buf2, + FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2, MPI_Datatype datatype) { if (caller == KSPACE) { @@ -795,14 +795,14 @@ void Grid3dKokkos::reverse_comm(int caller, void *ptr, int which, in template void Grid3dKokkos:: reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int m; MPI_Request request; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); @@ -850,14 +850,14 @@ reverse_comm_kspace_brick(KSpace *kspace, int which, int nper, template void Grid3dKokkos:: reverse_comm_kspace_tiled(KSpace *kspace, int which, int nper, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf1, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf2, MPI_Datatype datatype) + FFT_DAT::tdual_FFT_SCALAR_1d &k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf2, MPI_Datatype datatype) { int i,m,offset; KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); - FFT_KOKKOS_SCALAR* buf1; - FFT_KOKKOS_SCALAR* buf2; + FFT_SCALAR* buf1; + FFT_SCALAR* buf2; if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); diff --git a/src/KOKKOS/grid3d_kokkos.h b/src/KOKKOS/grid3d_kokkos.h index 864ac19c060..19751d83c9c 100644 --- a/src/KOKKOS/grid3d_kokkos.h +++ b/src/KOKKOS/grid3d_kokkos.h @@ -34,9 +34,9 @@ class Grid3dKokkos : public Grid3d { ~Grid3dKokkos() override; void forward_comm(int, void *, int, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm(int, void *, int, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); private: DAT::tdual_int_2d k_swap_packlist; @@ -57,13 +57,13 @@ class Grid3dKokkos : public Grid3d { void setup_comm_tiled(int &, int &) override; void forward_comm_kspace_brick(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void forward_comm_kspace_tiled(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_brick(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void reverse_comm_kspace_tiled(class KSpace *, int, int, - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, MPI_Datatype); + FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype); void grow_swap() override; diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 66f32d29fb1..265677a21c7 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -119,14 +119,14 @@ }while(0) */ -#define KISS_FFT_KOKKOS_COS(phase) (FFT_KOKKOS_SCALAR) cos(phase) -#define KISS_FFT_KOKKOS_SIN(phase) (FFT_KOKKOS_SCALAR) sin(phase) +#define KISS_FFT_COS(phase) (FFT_SCALAR) cos(phase) +#define KISS_FFT_SIN(phase) (FFT_SCALAR) sin(phase) #define HALF_OF(x) ((x)*.5) #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index).re = KISS_FFT_KOKKOS_COS(phase);\ - (x)(x_index).im = KISS_FFT_KOKKOS_SIN(phase);\ + (x)(x_index).re = KISS_FFT_COS(phase);\ + (x)(x_index).im = KISS_FFT_SIN(phase);\ }while(0) @@ -142,8 +142,8 @@ struct kiss_fft_state_kokkos { int nfft; int inverse; typename FFT_AT::t_int_64 d_factors; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_twiddles; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d d_scratch; + typename FFT_AT::t_FFT_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_DATA_1d d_scratch; }; template @@ -153,11 +153,11 @@ class KissFFTKokkos { typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR t[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -179,11 +179,11 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR scratch[6][2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; @@ -237,14 +237,14 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR scratch[5][2]; - FFT_KOKKOS_SCALAR epi3[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR scratch[5][2]; + FFT_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); epi3[0] = d_twiddles(fstride*m).re; epi3[1] = d_twiddles(fstride*m).im; @@ -289,13 +289,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; - FFT_KOKKOS_SCALAR scratch[13][2]; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR ya[2],yb[2]; + FFT_SCALAR scratch[13][2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); ya[1] = d_twiddles(fstride*m).im; ya[0] = d_twiddles(fstride*m).re; @@ -369,15 +369,15 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_twiddles = st.d_twiddles; - FFT_KOKKOS_SCALAR t[2]; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + FFT_SCALAR t[2]; int Norig = st.nfft; - typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u &st, int Fout_count, int f_count, int factors_count) { @@ -452,7 +452,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, FFT_KOKKOS_HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -497,11 +497,11 @@ class KissFFTKokkos { st.inverse = inverse_fft; typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); - typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d(); + typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); if (nfft > 0) { k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename FFT_AT::tdual_FFT_KOKKOS_DATA_1d("kissfft:twiddles",nfft); + k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,12 +524,12 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_KOKKOS_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { @@ -538,7 +538,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fin, typename FFT_AT::t_FFT_KOKKOS_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 567dc02ff3a..08369b3c78e 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -12,8 +12,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef LMP_KOKKOS_BASE_FFT_KOKKOS_H -#define LMP_KOKKOS_BASE_FFT_KOKKOS_H +#ifndef LMP_KOKKOS_BASE_FFT_H +#define LMP_KOKKOS_BASE_FFT_H #include "fftdata_kokkos.h" @@ -24,10 +24,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} // Kspace - virtual void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h index 148e001de38..6cea9bb63a4 100644 --- a/src/KOKKOS/lmpfftsettings_kokkos.h +++ b/src/KOKKOS/lmpfftsettings_kokkos.h @@ -16,6 +16,8 @@ #ifndef LMP_FFT_KOKKOS_SETTINGS_H #define LMP_FFT_KOKKOS_SETTINGS_H +#include "lmpfftsettings.h" + // if user set FFTW, it means FFTW3 #ifdef FFT_KOKKOS_FFTW @@ -38,17 +40,4 @@ #define LMP_FFT_KOKKOS_LIB "KISS FFT" #endif -#ifdef FFT_KOKKOS_SINGLE -typedef float FFT_KOKKOS_SCALAR; -#define FFT_KOKKOS_PRECISION 1 -#define LMP_FFT_KOKKOS_PREC "single" -#define MPI_FFT_KOKKOS_SCALAR MPI_FLOAT -#else - -typedef double FFT_KOKKOS_SCALAR; -#define FFT_KOKKOS_PRECISION 2 -#define LMP_FFT_KOKKOS_PREC "double" -#define MPI_FFT_KOKKOS_SCALAR MPI_DOUBLE -#endif - #endif diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 5e014db020e..fe90d294a67 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -44,7 +44,7 @@ struct pack_3d_functor { public: typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -52,7 +52,7 @@ struct pack_3d_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -80,7 +80,7 @@ struct pack_3d_functor { } }; -static void pack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -98,7 +98,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -106,7 +106,7 @@ struct unpack_3d_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -134,7 +134,7 @@ struct unpack_3d_functor { } }; -static void unpack_3d(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -153,7 +153,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -161,7 +161,7 @@ struct unpack_3d_permute1_1_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -189,7 +189,7 @@ struct unpack_3d_permute1_1_functor { } }; -static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -206,7 +206,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -214,7 +214,7 @@ struct unpack_3d_permute1_2_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -243,7 +243,7 @@ struct unpack_3d_permute1_2_functor { } }; -static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -261,7 +261,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -270,7 +270,7 @@ struct unpack_3d_permute1_n_functor { int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -299,7 +299,7 @@ struct unpack_3d_permute1_n_functor { } }; -static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -317,7 +317,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -325,7 +325,7 @@ struct unpack_3d_permute2_1_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -352,7 +352,7 @@ struct unpack_3d_permute2_1_functor { } }; -static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -370,7 +370,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -378,7 +378,7 @@ struct unpack_3d_permute2_2_functor { int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -406,7 +406,7 @@ struct unpack_3d_permute2_2_functor { } }; -static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -423,7 +423,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -432,7 +432,7 @@ struct unpack_3d_permute2_n_functor { int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -460,7 +460,7 @@ struct unpack_3d_permute2_n_functor { } }; -static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index ed7ace08c16..6e1b3a83fa9 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -48,7 +48,7 @@ using namespace MathSpecialKokkos; enum{REVERSE_RHO}; enum{FORWARD_IK,FORWARD_IK_PERATOM}; -#ifdef FFT_KOKKOS_SINGLE +#ifdef FFT_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else @@ -582,7 +582,7 @@ void PPPMKokkos::compute(int eflag, int vflag) if (atom->nmax > nmax) { nmax = atomKK->nmax; d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -595,8 +595,8 @@ void PPPMKokkos::compute(int eflag, int vflag) // to fully sum contribution in their 3d bricks // remap from 3d decomposition to FFT decomposition - gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); brick2fft(); // compute potential gradient on my FFT grid and @@ -609,14 +609,14 @@ void PPPMKokkos::compute(int eflag, int vflag) // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); // extra per-atom energy/virial communication if (evflag_atom) - gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_KOKKOS_SCALAR), - k_gc_buf1,k_gc_buf2,MPI_FFT_KOKKOS_SCALAR); + gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK_PERATOM,7,sizeof(FFT_SCALAR), + k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR); // calculate the force on my particles @@ -730,8 +730,8 @@ void PPPMKokkos::allocate() npergrid = 3; - k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); // tally local grid sizes // ngrid = count of owned+ghost grid cells on this proc @@ -753,7 +753,7 @@ void PPPMKokkos::allocate() // allocate distributed grid data - d_density_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -775,17 +775,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -810,7 +810,7 @@ void PPPMKokkos::allocate() remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_KOKKOS_PRECISION,collective_flag,gpu_aware_flag); + 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); } /* ---------------------------------------------------------------------- @@ -847,14 +847,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // use same GC ghost grid object for peratom grid communication @@ -862,8 +862,8 @@ void PPPMKokkos::allocate_peratom() npergrid = 7; - k_gc_buf1 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); - k_gc_buf2 = FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); + k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1); + k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2); } /* ---------------------------------------------------------------------- @@ -1234,14 +1234,14 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; @@ -1250,13 +1250,13 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; - const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; - const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; a_density_brick(mz,my,mx) += x0*d_rho1d(i,l+order/2,0); @@ -1294,9 +1294,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_KOKKOS_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_KOKKOS_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_KOKKOS_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; nz -= nzlo_out; ny -= nylo_out; @@ -1304,15 +1304,15 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_KOKKOS_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; - const FFT_KOKKOS_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); + const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); for (int m = nlower; m <= nupper; m++) { const int my = m+ny; const int im = in+my*ix; - const FFT_KOKKOS_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); + const FFT_SCALAR x0 = y0*d_rho1d(i,m+order/2,1); for (int l = nlower; l <= nupper; l++) { const int mx = l+nx; const int il = im+mx; @@ -2040,8 +2040,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_KOKKOS_SCALAR x0,y0,z0; - FFT_KOKKOS_SCALAR ekx,eky,ekz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2100,8 +2100,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i) const { int l,m,n,nx,ny,nz,mx,my,mz; - FFT_KOKKOS_SCALAR dx,dy,dz,x0,y0,z0; - FFT_KOKKOS_SCALAR u,v0,v1,v2,v3,v4,v5; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; nx = d_part2grid(i,0); ny = d_part2grid(i,1); @@ -2155,7 +2155,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2211,7 +2211,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2269,7 +2269,7 @@ void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template -void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2299,7 +2299,7 @@ void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template -void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2332,11 +2332,11 @@ void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) co template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &dx, const FFT_KOKKOS_SCALAR &dy, - const FFT_KOKKOS_SCALAR &dz) const +void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) const { int k,l; - FFT_KOKKOS_SCALAR r1,r2,r3; + FFT_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; @@ -2375,10 +2375,10 @@ template void PPPMKokkos::compute_rho_coeff() { int j,k,l,m; - FFT_KOKKOS_SCALAR s; - FFT_KOKKOS_SCALAR **a = new FFT_KOKKOS_SCALAR *[order]; + FFT_SCALAR s; + FFT_SCALAR **a = new FFT_SCALAR *[order]; for (int i = 0; i < order; ++i) - a[i] = new FFT_KOKKOS_SCALAR[2*order+1]; + a[i] = new FFT_SCALAR[2*order+1]; for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) @@ -2390,7 +2390,7 @@ void PPPMKokkos::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); -#ifdef FFT_KOKKOS_SINGLE +#ifdef FFT_SINGLE s += powf(0.5,(float) l+1) * (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); #else @@ -2586,18 +2586,18 @@ double PPPMKokkos::memory_usage() double bytes = (double)nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += (double)4 * nbrick * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)4 * nbrick * sizeof(FFT_SCALAR); if (triclinic) bytes += (double)3 * nfft_both * sizeof(double); bytes += (double)6 * nfft_both * sizeof(double); bytes += (double)nfft_both * sizeof(double); - bytes += (double)nfft_both*5 * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)nfft_both*5 * sizeof(FFT_SCALAR); if (peratom_allocate_flag) - bytes += (double)6 * nbrick * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)6 * nbrick * sizeof(FFT_SCALAR); // two Grid3d bufs - bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_KOKKOS_SCALAR); + bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR); return bytes; } diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 09513c9a2fb..14d4670dbdf 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -350,7 +350,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; int unpack_offset; DAT::tdual_int_scalar k_flag; @@ -364,31 +364,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_density_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_u_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_density_fft; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work1; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_work2; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_density_fft; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work1; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_work2; + FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; + FFT_DAT::tdual_FFT_SCALAR_1d k_work1; + FFT_DAT::tdual_FFT_SCALAR_1d k_work2; + typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; - //FFT_KOKKOS_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d_3 d_rho1d; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_2d k_rho_coeff; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_2d d_rho_coeff; - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_2d h_rho_coeff; + //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; + FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; + FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; @@ -398,7 +398,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { RemapKokkos *remap; Grid3dKokkos *gc; - FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d k_gc_buf1,k_gc_buf2; + FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2; int ngc_buf1,ngc_buf2,npergrid; //int **part2grid; // storage for particle -> grid mapping @@ -429,17 +429,17 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void fieldforce_peratom() override; KOKKOS_INLINE_FUNCTION - void compute_rho1d(const int i, const FFT_KOKKOS_SCALAR &, const FFT_KOKKOS_SCALAR &, - const FFT_KOKKOS_SCALAR &) const; + void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &, + const FFT_SCALAR &) const; void compute_rho_coeff(); void slabcorr() override; // grid communication - void pack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_forward_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; - void pack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; - void unpack_reverse_grid_kokkos(int, FFT_KOKKOS_DAT::tdual_FFT_KOKKOS_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; + void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) override; + void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) override; // triclinic diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index 18ba6264605..efc6742a259 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -59,7 +59,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -103,7 +103,7 @@ void RemapKokkos::perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_ ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_in, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_out, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -111,7 +111,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL // use point-to-point communication int i,isend,irecv; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -120,20 +120,20 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL // post all recvs into scratch space - FFT_KOKKOS_SCALAR* v_scratch = d_scratch.data(); + FFT_SCALAR* v_scratch = d_scratch.data(); if (!plan->usecuda_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_KOKKOS_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_KOKKOS_SCALAR,plan->recv_proc[irecv],0, + MPI_FFT_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); } - FFT_KOKKOS_SCALAR* v_sendbuf = plan->d_sendbuf.data(); + FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usecuda_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); @@ -149,7 +149,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCAL if (!plan->usecuda_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_KOKKOS_SCALAR, + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -465,7 +465,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return nullptr; } @@ -475,7 +475,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return nullptr; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index ad5fa9833d2..a62c14f00b9 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -28,13 +28,13 @@ template struct remap_plan_3d_kokkos { typedef DeviceType device_type; typedef FFTArrayTypes FFT_AT; - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_sendbuf; // buffer for MPI sends - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_sendbuf; // host buffer for MPI sends - typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - FFT_KOKKOS_HAT::t_FFT_KOKKOS_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs - void (*pack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends + FFT_HAT::t_FFT_SCALAR_1d h_sendbuf; // host buffer for MPI sends + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + FFT_HAT::t_FFT_SCALAR_1d h_scratch; // host scratch buffer for MPI recvs + void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -66,11 +66,11 @@ class RemapKokkos : protected Pointers { RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos() override; - void perform(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, typename FFT_AT::t_FFT_KOKKOS_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From 6d1d515f3a3f7369f9ace5bef4dfcc1b81d6f80e Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Dec 2023 15:32:32 -0700 Subject: [PATCH 07/31] Fix compile issue --- src/KOKKOS/pppm_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 6e1b3a83fa9..2a53682df3a 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -285,7 +285,7 @@ void PPPMKokkos::init() estimated_accuracy); mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n", estimated_accuracy/two_charge_force); - mesg += " using " LMP_FFT_KOKKOS_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; + mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_KOKKOS_LIB "\n"; mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n", ngrid_max,nfft_both_max); utils::logmesg(lmp,mesg); From c45183d45c98c151ae4227dd415859249afae766 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Thu, 14 Dec 2023 15:38:45 -0500 Subject: [PATCH 08/31] Updated CMake build system to allow for FFT_KOKKOS parameter. Updated CMakeLists.txt to print the correct value when summarizing. --- cmake/CMakeLists.txt | 16 +--------------- cmake/Modules/Packages/KOKKOS.cmake | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 28e02bbee7b..aacaca4e6ca 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -984,21 +984,7 @@ if(PKG_KSPACE) message(STATUS "Using non-threaded FFTs") endif() if(PKG_KOKKOS) - if(Kokkos_ENABLE_CUDA) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: cuFFT") - endif() - elseif(Kokkos_ENABLE_HIP) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: hipFFT") - endif() - else() - message(STATUS "Kokkos FFT: ${FFT}") - endif() + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") endif() endif() if(BUILD_DOC) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 0edd9a3baad..eb20f93956e 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -131,16 +131,31 @@ if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) + set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) + validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) + string(TOUPPER ${FFT_KOKKOS} FFT_KOKKOS) + if(Kokkos_ENABLE_CUDA) - if(NOT (FFT STREQUAL "KISS")) - target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) + if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "CUFFT"))) + message(FATAL_ERROR "The CUDA backend of Kokkos requires either KISS FFT or CUFFT.") + elseif(FFT_KOKKOS STREQUAL "KISS") + message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) + elseif(FFT_KOKKOS STREQUAL "CUFFT") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() elseif(Kokkos_ENABLE_HIP) - if(NOT (FFT STREQUAL "KISS")) + if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "HIPFFT"))) + message(FATAL_ERROR "The HIP backend of Kokkos requires either KISS FFT or HIPFFT.") + elseif(FFT_KOKKOS STREQUAL "KISS") + message(WARNING "Using KISS FFT with the HIP backend of Kokkos may be sub-optimal.") + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) + elseif(FFT_KOKKOS STREQUAL "HIPFFT") include(DetectHIPInstallation) find_package(hipfft REQUIRED) - target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT) + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_HIPFFT) target_link_libraries(lammps PRIVATE hip::hipfft) endif() endif() From a6addbc90761fd9b3e48e5a3064f8f589a58140e Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 11:30:56 -0500 Subject: [PATCH 09/31] Updated documentation for FFT_KOKKOS_ flags and CMake variable selection --- doc/src/Build_settings.rst | 18 +++++++++++++++--- doc/src/Howto_cmake.rst | 2 ++ doc/src/kspace_style.rst | 5 ++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 7576cae3eb2..33b0508fe90 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -51,14 +51,18 @@ LAMMPS can use them if they are available on your system. .. code-block:: bash -D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS + -D FFT_KOKKOS=value # FFTW3 or MKL or KISS or CUFFT or HIPFFT, default is KISS -D FFT_SINGLE=value # yes or no (default), no = double precision -D FFT_PACK=value # array (default) or pointer or memcpy .. note:: - The values for the FFT variable must be in upper-case. This is - an exception to the rule that all CMake variables can be specified - with lower-case values. + When the Kokkos variant of a package is compiled and selected at run time, + the FFT library selected by the FFT_KOKKOS variable applies. Otherwise, + the FFT library selected by the FFT variable applies. + The same FFT settings apply to both. FFT_KOKKOS must be compatible with the + Kokkos backend - for example, when using the CUDA backend of Kokkos, + you must use either CUFFT or KISS. Usually these settings are all that is needed. If FFTW3 is selected, then CMake will try to detect, if threaded FFTW @@ -87,6 +91,8 @@ LAMMPS can use them if they are available on your system. FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS # default is KISS if not specified + FFT_INC = -DFFT_KOKKOS_CUFFT # -DFFT_KOKKOS_{FFTW,FFTW3,MKL,CUFFT,HIPFFT,KISS} + # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries @@ -97,6 +103,8 @@ LAMMPS can use them if they are available on your system. FFT_INC = -I/usr/local/include FFT_PATH = -L/usr/local/lib + FFT_LIB = -lhipfft # hipFFT either precision + FFT_LIB = -lcufft # cuFFT either precision FFT_LIB = -lfftw3 # FFTW3 double precision FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS) FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision @@ -141,6 +149,10 @@ The Intel MKL math library is part of the Intel compiler suite. It can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting above). +The CUFFT and HIPFFT FFT libraries are packaged with NVIDIA's CUDA and AMD's +HIP installations, respectively. These FFT libraries require the Kokkos acceleration +package to be enabled and the Kokkos backend to be GPU-resident (ie, HIP or CUDA). + Performing 3d FFTs in parallel can be time-consuming due to data access and required communication. This cost can be reduced by performing single-precision FFTs instead of double precision. Single precision diff --git a/doc/src/Howto_cmake.rst b/doc/src/Howto_cmake.rst index 42324cf2f10..8b710d10650 100644 --- a/doc/src/Howto_cmake.rst +++ b/doc/src/Howto_cmake.rst @@ -349,6 +349,8 @@ Some common LAMMPS specific variables - when set to ``name`` the LAMMPS executable and library will be called ``lmp_name`` and ``liblammps_name.a`` * - ``FFT`` - select which FFT library to use: ``FFTW3``, ``MKL``, ``KISS`` (default, unless FFTW3 is found) + * - ``FFT_KOKKOS`` + - select which FFT library to use in Kokkos-enabled styles: ``FFTW3``, ``MKL``, ``HIPFFT``, ``CUFFT``, ``KISS`` (default) * - ``FFT_SINGLE`` - select whether to use single precision FFTs (default: ``off``) * - ``WITH_JPEG`` diff --git a/doc/src/kspace_style.rst b/doc/src/kspace_style.rst index 38a6fce375a..78d7380c01f 100644 --- a/doc/src/kspace_style.rst +++ b/doc/src/kspace_style.rst @@ -450,7 +450,10 @@ relative RMS error. For the KOKKOS package, the *pppm/kk* style performs charge assignment and force interpolation calculations, along with the FFTs themselves, on the GPU or (optionally) threaded on the CPU when - using OpenMP and FFTW3. + using OpenMP and FFTW3. The specific FFT library is selected using + the FFT_KOKKOS CMake parameter. See the + :doc:`Build settings ` doc page for how to select a + 3rd-party FFT library. ---------- From bc47f4f3a32c8499d5bd5fd6bc4a68424b700da5 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 11:56:23 -0500 Subject: [PATCH 10/31] Updated CMake preset files for kokkos-cuda and kokkos-hip --- cmake/presets/kokkos-cuda.cmake | 3 +++ cmake/presets/kokkos-hip.cmake | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cmake/presets/kokkos-cuda.cmake b/cmake/presets/kokkos-cuda.cmake index c3ee081898f..3205387044e 100644 --- a/cmake/presets/kokkos-cuda.cmake +++ b/cmake/presets/kokkos-cuda.cmake @@ -9,5 +9,8 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE) get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokkos/bin/nvcc_wrapper ABSOLUTE) set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE) +# If KSPACE is also enabled, use CUFFT for FFTs +set(FFT_KOKKOS "CUFFT" CACHE STRING FORCE) + # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) diff --git a/cmake/presets/kokkos-hip.cmake b/cmake/presets/kokkos-hip.cmake index 827a37152b3..ffc259a2252 100644 --- a/cmake/presets/kokkos-hip.cmake +++ b/cmake/presets/kokkos-hip.cmake @@ -12,6 +12,9 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE) set(CMAKE_CXX_COMPILER hipcc CACHE STRING "" FORCE) set(CMAKE_TUNE_FLAGS "-munsafe-fp-atomics" CACHE STRING "" FORCE) +# If KSPACE is also enabled, use CUFFT for FFTs +set(FFT_KOKKOS "HIPFFT" CACHE STRING FORCE) + # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) From d02ffb0e709cb57ff0959c74d74a7a0ad9b7670e Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:06:41 -0500 Subject: [PATCH 11/31] Updated Summit & Frontier template Makefiles --- src/MAKE/MACHINES/Makefile.frontier_kokkos | 2 +- src/MAKE/MACHINES/Makefile.summit_kokkos | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.frontier_kokkos b/src/MAKE/MACHINES/Makefile.frontier_kokkos index 86cddd12b77..b58a3d871c2 100644 --- a/src/MAKE/MACHINES/Makefile.frontier_kokkos +++ b/src/MAKE/MACHINES/Makefile.frontier_kokkos @@ -55,7 +55,7 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa MY_HIP_EXE = $(shell which hipcc) MY_HIP_PATH = $(dir ${MY_HIP_EXE}) -FFT_INC = -DFFT_HIPFFT +FFT_INC = -DFFT_KOKKOS_HIPFFT FFT_PATH = FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft diff --git a/src/MAKE/MACHINES/Makefile.summit_kokkos b/src/MAKE/MACHINES/Makefile.summit_kokkos index 557ebd22b2a..d554e09a5ad 100644 --- a/src/MAKE/MACHINES/Makefile.summit_kokkos +++ b/src/MAKE/MACHINES/Makefile.summit_kokkos @@ -57,7 +57,7 @@ MPI_LIB = -L${MY_MPI_PATH}../lib -lmpi_ibm # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_CUFFT +FFT_INC = -DFFT_KOKKOS_CUFFT FFT_PATH = FFT_LIB = -lcufft From bc7050ab5001b4480383d9e16995494a25f1bec8 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:11:31 -0500 Subject: [PATCH 12/31] Added LMP_HEFFTE to CMakeLists.txt to attempt to fix a merge conflict --- cmake/CMakeLists.txt | 46 +++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index aacaca4e6ca..76248445e91 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -971,20 +971,40 @@ if(PKG_KOKKOS) endif() endif() if(PKG_KSPACE) - message(STATUS "<<< FFT settings >>> --- Primary FFT lib: ${FFT}") - if(FFT_SINGLE) - message(STATUS "Using single precision FFTs") - else() - message(STATUS "Using double precision FFTs") - endif() - if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) - message(STATUS "Using threaded FFTs") + if (LMP_HEFFTE) + message(STATUS "<<< FFT settings >>> +-- Primary FFT lib: heFFTe") + if (HEFFTE_BACKEND) + message(STATUS "heFFTe backend: ${HEFFTE_BACKEND}") + else() + message(STATUS "heFFTe backend: stock (builtin FFT implementation, tested for corrected but not optimized for production)") + endif() + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() else() - message(STATUS "Using non-threaded FFTs") - endif() - if(PKG_KOKKOS) - message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") + message(STATUS "<<< FFT settings >>> +-- Primary FFT lib: ${FFT}") + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) + message(STATUS "Using threaded FFTs") + else() + message(STATUS "Using non-threaded FFTs") + endif() + if (FFT_HEFFTE) + message(STATUS "Using distributed algorithms from heFTTe") + else() + message(STATUS "Using builtin distributed algorithms") + endif() + if(PKG_KOKKOS) + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") + endif() endif() endif() if(BUILD_DOC) From dd1ac640aeec2686b2757d734546d6960804bcc2 Mon Sep 17 00:00:00 2001 From: Nick Hagerty Date: Mon, 18 Dec 2023 12:56:30 -0500 Subject: [PATCH 13/31] Added declaration for FFT_KOKKOS variable --- cmake/Modules/Packages/KOKKOS.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index eb20f93956e..a0b872ba858 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -131,6 +131,7 @@ if(PKG_KSPACE) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + set(FFT_KOKKOS "KISS" CACHE STRING "FFT library for Kokkos-enabled KSPACE package") set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) From 10c09225886fe415d10a3b1ce0e7a69e1f75920b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 5 Jan 2024 09:18:02 -0700 Subject: [PATCH 14/31] Add author attribution --- src/KOKKOS/fft3d_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index d78239606e1..c50af939a4d 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Stan Moore (SNL), Sam Mish (U.C. Davis) + Contributing authors: Stan Moore (SNL), Sam Mish (U.C. Davis), Nick Hagerty (ORNL) ------------------------------------------------------------------------- */ #include "fft3d_kokkos.h" From fc2d78fc0a7834490f4ee53dc4e14a24006dc0ef Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 5 Jan 2024 09:25:32 -0700 Subject: [PATCH 15/31] Update Makefiles --- src/MAKE/MACHINES/Makefile.perlmutter_kokkos | 2 +- src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos index 43162b88ddc..cdf2daa4719 100644 --- a/src/MAKE/MACHINES/Makefile.perlmutter_kokkos +++ b/src/MAKE/MACHINES/Makefile.perlmutter_kokkos @@ -54,7 +54,7 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_cud # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_CUFFT +FFT_INC = -DFFT_KOKKOS_CUFFT FFT_PATH = FFT_LIB = ${CRAY_CUDATOOLKIT_POST_LINK_OPTS} -lcufft diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi index e78be1acdcf..88c07fc2c51 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi +++ b/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi @@ -57,7 +57,7 @@ MPI_LIB = # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_CUFFT +FFT_INC = -DFFT_KOKKOS_CUFFT FFT_PATH = FFT_LIB = -lcufft From e319555f64cb9e2c47f6fc8b19c84e5ba9a8b6e1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 31 Jan 2024 12:54:03 -0500 Subject: [PATCH 16/31] merge lmpfftsettings_kokkos.h into lmpfftsettings.h --- src/KOKKOS/fftdata_kokkos.h | 21 +++++---------- src/KOKKOS/lmpfftsettings_kokkos.h | 43 ------------------------------ src/lmpfftsettings.h | 24 ++++++++++++++++- 3 files changed, 29 insertions(+), 59 deletions(-) delete mode 100644 src/KOKKOS/lmpfftsettings_kokkos.h diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 15dca33bcc1..7f900002e5d 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -12,29 +12,20 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "kokkos_type.h" - -#ifndef MAX -#define MAX(A,B) ((A) > (B) ? (A) : (B)) -#endif - // data types for 2d/3d FFTs #ifndef LMP_FFT_DATA_KOKKOS_H #define LMP_FFT_DATA_KOKKOS_H -#include "lmpfftsettings_kokkos.h" - -// ------------------------------------------------------------------------- - -// Data types for single-precision complex +#include "kokkos_type.h" -#if FFT_PRECISION == 1 -#elif FFT_PRECISION == 2 -#else -#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" +#ifndef MAX +#define MAX(A,B) ((A) > (B) ? (A) : (B)) #endif +#include "lmpfftsettings.h" + +// ------------------------------------------------------------------------- // with KOKKOS in CUDA or HIP mode we can only have // CUFFT/HIPFFT or KISSFFT, thus undefine all other diff --git a/src/KOKKOS/lmpfftsettings_kokkos.h b/src/KOKKOS/lmpfftsettings_kokkos.h deleted file mode 100644 index 6cea9bb63a4..00000000000 --- a/src/KOKKOS/lmpfftsettings_kokkos.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -// common FFT library related defines and compilation settings - -#ifndef LMP_FFT_KOKKOS_SETTINGS_H -#define LMP_FFT_KOKKOS_SETTINGS_H - -#include "lmpfftsettings.h" - -// if user set FFTW, it means FFTW3 - -#ifdef FFT_KOKKOS_FFTW -#ifndef FFT_KOKKOS_FFTW3 -#define FFT_KOKKOS_FFTW3 -#endif -#endif - -// set strings for library info output - -#if defined(FFT_KOKKOS_FFTW3) -#define LMP_FFT_KOKKOS_LIB "FFTW3" -#elif defined(FFT_KOKKOS_MKL) -#define LMP_FFT_KOKKOS_LIB "MKL FFT" -#elif defined(FFT_KOKKOS_CUFFT) -#define LMP_FFT_KOKKOS_LIB "cuFFT" -#elif defined(FFT_KOKKOS_HIPFFT) -#define LMP_FFT_KOKKOS_LIB "hipFFT" -#else -#define LMP_FFT_KOKKOS_LIB "KISS FFT" -#endif - -#endif diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index 7fad0de8c78..426726440ef 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -16,7 +16,7 @@ #ifndef LMP_FFT_SETTINGS_H #define LMP_FFT_SETTINGS_H -// if user set FFTW, it means FFTW3 +// if a user sets FFTW, it means FFTW3 #ifdef FFT_FFTW #ifndef FFT_FFTW3 @@ -24,6 +24,14 @@ #endif #endif +#ifdef LMP_KOKKOS +#ifdef FFT_KOKKOS_FFTW +#ifndef FFT_KOKKOS_FFTW3 +#define FFT_KOKKOS_FFTW3 +#endif +#endif +#endif + // set strings for library info output #if defined(FFT_FFTW3) @@ -38,6 +46,20 @@ #define LMP_FFT_LIB "KISS FFT" #endif +#ifdef LMP_KOKKOS +#if defined(FFT_KOKKOS_FFTW3) +#define LMP_FFT_KOKKOS_LIB "FFTW3" +#elif defined(FFT_KOKKOS_MKL) +#define LMP_FFT_KOKKOS_LIB "MKL FFT" +#elif defined(FFT_KOKKOS_CUFFT) +#define LMP_FFT_KOKKOS_LIB "cuFFT" +#elif defined(FFT_KOKKOS_HIPFFT) +#define LMP_FFT_KOKKOS_LIB "hipFFT" +#else +#define LMP_FFT_KOKKOS_LIB "KISS FFT" +#endif +#endif + #ifdef FFT_SINGLE typedef float FFT_SCALAR; #define FFT_PRECISION 1 From de7c459b2f18e8aac01497129316a9e3ba6059c4 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 31 Jan 2024 13:28:41 -0500 Subject: [PATCH 17/31] consolidate FFT settings in headers --- src/AMOEBA/amoeba_convolution.h | 12 +----------- src/INTEL/pppm_electrode_intel.cpp | 29 +++++++++++++++-------------- src/INTEL/pppm_electrode_intel.h | 3 --- src/KSPACE/msm.cpp | 4 ---- src/kspace.h | 8 -------- src/lmpfftsettings.h | 8 ++++++++ 6 files changed, 24 insertions(+), 40 deletions(-) diff --git a/src/AMOEBA/amoeba_convolution.h b/src/AMOEBA/amoeba_convolution.h index bed65149ecb..60825bb8b62 100644 --- a/src/AMOEBA/amoeba_convolution.h +++ b/src/AMOEBA/amoeba_convolution.h @@ -15,17 +15,7 @@ #define LMP_AMOEBA_CONVOLUTION_H #include "pointers.h" - -#ifdef FFT_SINGLE -typedef float FFT_SCALAR; -#define LMP_FFT_PREC "single" -#define MPI_FFT_SCALAR MPI_FLOAT -#else - -typedef double FFT_SCALAR; -#define LMP_FFT_PREC "double" -#define MPI_FFT_SCALAR MPI_DOUBLE -#endif +#include "lmpfftsettings.h" namespace LAMMPS_NS { diff --git a/src/INTEL/pppm_electrode_intel.cpp b/src/INTEL/pppm_electrode_intel.cpp index 9f3c57b50e3..11cda407489 100644 --- a/src/INTEL/pppm_electrode_intel.cpp +++ b/src/INTEL/pppm_electrode_intel.cpp @@ -42,6 +42,7 @@ #include "update.h" #include "wire_dipole.h" +#include #include #include @@ -164,7 +165,6 @@ void PPPMElectrodeIntel::setup() PPPMIntel::setup(); prd[0] /= wire_volfactor; prd[1] /= wire_volfactor; - } void PPPMElectrodeIntel::compute(int eflag, int vflag) @@ -280,7 +280,7 @@ void PPPMElectrodeIntel::compute(int eflag, int vflag) slabflag = 0; // bypass compute_second's slabcorr() PPPMIntel::compute_second(eflag, vflag); slabflag = tempslabflag; - boundcorr->compute_corr(qsum, eflag_atom, eflag_global, energy, eatom); + boundcorr->compute_corr(qsum, eflag_atom, eflag_global, energy, eatom); compute_vector_called = false; } @@ -333,7 +333,7 @@ void PPPMElectrodeIntel::compute_vector(double *vec, int sensor_grpbit, int sour // electrolyte density (without writing an additional function) FFT_SCALAR ***density_brick_real = density_brick; FFT_SCALAR *density_fft_real = density_fft; - if (neighbor->ago != 0) pack_buffers(); // since midstep positions may be outdated + if (neighbor->ago != 0) pack_buffers(); // since midstep positions may be outdated switch (fix->precision()) { case FixIntel::PREC_MODE_MIXED: make_rho_in_brick(fix->get_mixed_buffers(), source_grpbit, @@ -1202,22 +1202,23 @@ void PPPMElectrodeIntel::pack_buffers_q() { fix->start_watch(TIME_PACK); int packthreads; - if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads; - else packthreads = 1; - #if defined(_OPENMP) - #pragma omp parallel if (packthreads > 1) - #endif + if (comm->nthreads > INTEL_HTHREADS) + packthreads = comm->nthreads; + else + packthreads = 1; +#if defined(_OPENMP) +#pragma omp parallel if (packthreads > 1) +#endif { int ifrom, ito, tid; - IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost, - packthreads, - sizeof(IntelBuffers::atom_t)); + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, packthreads, + sizeof(IntelBuffers::atom_t)); if (fix->precision() == FixIntel::PREC_MODE_MIXED) - fix->get_mixed_buffers()->thr_pack_q(ifrom,ito); + fix->get_mixed_buffers()->thr_pack_q(ifrom, ito); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) - fix->get_double_buffers()->thr_pack_q(ifrom,ito); + fix->get_double_buffers()->thr_pack_q(ifrom, ito); else - fix->get_single_buffers()->thr_pack_q(ifrom,ito); + fix->get_single_buffers()->thr_pack_q(ifrom, ito); } fix->stop_watch(TIME_PACK); } diff --git a/src/INTEL/pppm_electrode_intel.h b/src/INTEL/pppm_electrode_intel.h index bfe325d9a1d..e61641ea19f 100644 --- a/src/INTEL/pppm_electrode_intel.h +++ b/src/INTEL/pppm_electrode_intel.h @@ -29,12 +29,9 @@ KSpaceStyle(pppm/electrode/intel,PPPMElectrodeIntel) #ifndef LMP_PPPM_ELECTRODE_INTEL_H #define LMP_PPPM_ELECTRODE_INTEL_H -#include "boundary_correction.h" #include "electrode_kspace.h" #include "fix_intel.h" -#include "pppm.h" #include "pppm_intel.h" -#include namespace LAMMPS_NS { diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index 8f79ab408c4..348135494e4 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -140,10 +140,6 @@ void MSM::init() if ((order < 4) || (order > 10) || (order%2 != 0)) error->all(FLERR,"MSM order must be 4, 6, 8, or 10"); - if (sizeof(FFT_SCALAR) != 8) - error->all(FLERR,"Cannot (yet) use single precision with MSM " - "(remove -DFFT_SINGLE from Makefile and re-compile)"); - // compute two charge force two_charge(); diff --git a/src/kspace.h b/src/kspace.h index 61ab15c1d9c..cc7d979d431 100644 --- a/src/kspace.h +++ b/src/kspace.h @@ -16,14 +16,6 @@ #include "pointers.h" // IWYU pragma: export -#ifdef FFT_SINGLE -typedef float FFT_SCALAR; -#define MPI_FFT_SCALAR MPI_FLOAT -#else -typedef double FFT_SCALAR; -#define MPI_FFT_SCALAR MPI_DOUBLE -#endif - namespace LAMMPS_NS { class KSpace : protected Pointers { diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index 426726440ef..33628661840 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -42,6 +42,14 @@ #define LMP_FFT_LIB "cuFFT" #elif defined(FFT_HIPFFT) #define LMP_FFT_LIB "hipFFT" +#elif defined(FFT_HEFFT) +#if defined(FFT_HEFFTE_FFTW) +#define LMP_FFT_LIB "HeFFTe (FFTW3)" +#elif defined(FFT_HEFFTE_MKL) +#define LMP_FFT_LIB "HeFFTe (MKL)" +#else +#define LMP_FFT_LIB "HeFFTe (native)" +#endif #else #define LMP_FFT_LIB "KISS FFT" #endif From 677c8258e22d943c64265b46c863b5ceb683303b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 31 Jan 2024 16:19:55 -0500 Subject: [PATCH 18/31] fix cmake issues: always build static heFFTe libs, fixup git hash, cmake output --- cmake/CMakeLists.txt | 19 ++++++++----------- cmake/Modules/Packages/KSPACE.cmake | 20 ++++++++++++++++---- src/lmpfftsettings.h | 18 +++++++++--------- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 76248445e91..cb029d4d207 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -971,14 +971,15 @@ if(PKG_KOKKOS) endif() endif() if(PKG_KSPACE) - if (LMP_HEFFTE) + if (FFT_USE_HEFFTE) message(STATUS "<<< FFT settings >>> -- Primary FFT lib: heFFTe") - if (HEFFTE_BACKEND) - message(STATUS "heFFTe backend: ${HEFFTE_BACKEND}") + if (FFT_HEFFTE_BACKEND) + message(STATUS "heFFTe backend: ${FFT_HEFFTE_BACKEND}") else() message(STATUS "heFFTe backend: stock (builtin FFT implementation, tested for corrected but not optimized for production)") endif() + message(STATUS "Using distributed FFT algorithms from heFTTe") if(FFT_SINGLE) message(STATUS "Using single precision FFTs") else() @@ -997,14 +998,10 @@ if(PKG_KSPACE) else() message(STATUS "Using non-threaded FFTs") endif() - if (FFT_HEFFTE) - message(STATUS "Using distributed algorithms from heFTTe") - else() - message(STATUS "Using builtin distributed algorithms") - endif() - if(PKG_KOKKOS) - message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") - endif() + message(STATUS "Using builtin distributed FFT algorithms") + endif() + if(PKG_KOKKOS) + message(STATUS "Kokkos FFT: ${FFT_KOKKOS}") endif() endif() if(BUILD_DOC) diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 9c9c879cd4a..2ce136aefba 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -50,7 +50,7 @@ option(FFT_USE_HEFFTE "Use heFFTe as the distributed FFT engine, overrides the if(FFT_USE_HEFFTE) # if FFT_HEFFTE is enabled, switch the builtin FFT engine with Heffte set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL) - set(FFT_HEFFTE_BACKEND "" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL") + set(FFT_HEFFTE_BACKEND "${FFT}" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL") set_property(CACHE FFT_HEFFTE_BACKEND PROPERTY STRINGS ${FFT_HEFFTE_BACKEND_VALUES}) if(FFT_HEFFTE_BACKEND STREQUAL "FFTW") # respect the backend choice, FFTW or MKL @@ -60,24 +60,36 @@ if(FFT_USE_HEFFTE) set(HEFFTE_COMPONENTS "MKL") set(Heffte_ENABLE_MKL "ON" CACHE BOOL "Enables MKL backend for heFFTe") else() + set(HEFFTE_COMPONENTS "BUILTIN") message(WARNING "FFT_HEFFTE_BACKEND not selected, defaulting to the builtin 'stock' backend, which is intended for testing and is not optimized for production runs") endif() find_package(Heffte 2.4.0 QUIET COMPONENTS ${HEFFTE_COMPONENTS}) if (NOT Heffte_FOUND) # download and build + if(BUILD_SHARED_LIBS) + set(BUILD_SHARED_LIBS_WAS_ON YES) + set(BUILD_SHARED_LIBS OFF) + endif() + if(CMAKE_REQUEST_PIC) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + endif() include(FetchContent) FetchContent_Declare(HEFFTE_PROJECT # using v2.4.0 URL "https://github.com/icl-utk-edu/heffte/archive/refs/tags/v2.4.0.tar.gz" URL_HASH SHA256=02310fb4f9688df02f7181667e61c3adb7e38baf79611d80919d47452ff7881d ) FetchContent_Populate(HEFFTE_PROJECT) + # fixup git hash to show "(unknown)" to avoid compilation failures. It cannot derive the HeFFTe hash anyway. + file(READ ${heffte_project_SOURCE_DIR}/include/heffte_config.cmake.h HEFFTE_CFG_FILE_TEXT) + string(REPLACE "@Heffte_GIT_HASH@" "(unknown)" HEFFTE_CFG_FILE_TEXT "${HEFFTE_CFG_FILE_TEXT}") + file(WRITE ${heffte_project_SOURCE_DIR}/include/heffte_config.cmake.h "${HEFFTE_CFG_FILE_TEXT}") add_subdirectory(${heffte_project_SOURCE_DIR} ${heffte_project_BINARY_DIR}) - set_target_properties(lmp PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") - set_target_properties(lammps PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") add_library(Heffte::Heffte INTERFACE IMPORTED GLOBAL) target_link_libraries(Heffte::Heffte INTERFACE Heffte) + if(BUILD_SHARED_LIBS_WAS_ON) + set(BUILD_SHARED_LIBS ON) + endif() endif() - target_compile_definitions(lammps PRIVATE -DFFT_HEFFTE "-DFFT_HEFFTE_${FFT_HEFFTE_BACKEND}") target_link_libraries(lammps PRIVATE Heffte::Heffte) endif() diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index 33628661840..10c57ee449f 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -34,7 +34,15 @@ // set strings for library info output -#if defined(FFT_FFTW3) +#if defined(FFT_HEFFTE) +#if defined(FFT_HEFFTE_FFTW) +#define LMP_FFT_LIB "HeFFTe(FFTW3)" +#elif defined(FFT_HEFFTE_MKL) +#define LMP_FFT_LIB "HeFFTe(MKL)" +#else +#define LMP_FFT_LIB "HeFFTe(native)" +#endif +#elif defined(FFT_FFTW3) #define LMP_FFT_LIB "FFTW3" #elif defined(FFT_MKL) #define LMP_FFT_LIB "MKL FFT" @@ -42,14 +50,6 @@ #define LMP_FFT_LIB "cuFFT" #elif defined(FFT_HIPFFT) #define LMP_FFT_LIB "hipFFT" -#elif defined(FFT_HEFFT) -#if defined(FFT_HEFFTE_FFTW) -#define LMP_FFT_LIB "HeFFTe (FFTW3)" -#elif defined(FFT_HEFFTE_MKL) -#define LMP_FFT_LIB "HeFFTe (MKL)" -#else -#define LMP_FFT_LIB "HeFFTe (native)" -#endif #else #define LMP_FFT_LIB "KISS FFT" #endif From d490d24bba21e513e67b3bab4317ee33e44db1dc Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 1 Feb 2024 11:30:59 -0500 Subject: [PATCH 19/31] update CMake script code to automatically chose heFFTe backed based on FFT choice --- cmake/Modules/Packages/KSPACE.cmake | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 2ce136aefba..6060d4b1db1 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -48,10 +48,15 @@ endif() option(FFT_USE_HEFFTE "Use heFFTe as the distributed FFT engine, overrides the FFT option." OFF) if(FFT_USE_HEFFTE) - # if FFT_HEFFTE is enabled, switch the builtin FFT engine with Heffte - set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL) - set(FFT_HEFFTE_BACKEND "${FFT}" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL") + # if FFT_HEFFTE is enabled, use the heFFTe parallel engine instead of the builtin fftMPI engine + + # map standard FFT choices to available heFFTe backends: FFTW3 -> FFTW, KISS -> BUILTIN + set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL BUILTIN) + string(REPLACE FFTW3 FFTW FFT_HEFFTE_BACKEND_DEFAULT ${FFT}) + string(REPLACE KISS BUILTIN FFT_HEFFTE_BACKEND_DEFAULT ${FFT_HEFFTE_BACKEND_DEFAULT}) + set(FFT_HEFFTE_BACKEND "${FFT_HEFFTE_BACKEND_DEFAULT}" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL") set_property(CACHE FFT_HEFFTE_BACKEND PROPERTY STRINGS ${FFT_HEFFTE_BACKEND_VALUES}) + validate_option(FFT_HEFFTE_BACKEND FFT_HEFFTE_BACKEND_VALUES) if(FFT_HEFFTE_BACKEND STREQUAL "FFTW") # respect the backend choice, FFTW or MKL set(HEFFTE_COMPONENTS "FFTW") @@ -73,19 +78,21 @@ if(FFT_USE_HEFFTE) if(CMAKE_REQUEST_PIC) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() + set(Heffte_ENABLE_${FFT_HEFFTE_BACKEND} ON) include(FetchContent) FetchContent_Declare(HEFFTE_PROJECT # using v2.4.0 URL "https://github.com/icl-utk-edu/heffte/archive/refs/tags/v2.4.0.tar.gz" URL_HASH SHA256=02310fb4f9688df02f7181667e61c3adb7e38baf79611d80919d47452ff7881d ) FetchContent_Populate(HEFFTE_PROJECT) - # fixup git hash to show "(unknown)" to avoid compilation failures. It cannot derive the HeFFTe hash anyway. + + # fixup git hash to show "(unknown)" to avoid compilation failures. file(READ ${heffte_project_SOURCE_DIR}/include/heffte_config.cmake.h HEFFTE_CFG_FILE_TEXT) string(REPLACE "@Heffte_GIT_HASH@" "(unknown)" HEFFTE_CFG_FILE_TEXT "${HEFFTE_CFG_FILE_TEXT}") file(WRITE ${heffte_project_SOURCE_DIR}/include/heffte_config.cmake.h "${HEFFTE_CFG_FILE_TEXT}") + add_subdirectory(${heffte_project_SOURCE_DIR} ${heffte_project_BINARY_DIR}) - add_library(Heffte::Heffte INTERFACE IMPORTED GLOBAL) - target_link_libraries(Heffte::Heffte INTERFACE Heffte) + add_library(Heffte::Heffte ALIAS Heffte) if(BUILD_SHARED_LIBS_WAS_ON) set(BUILD_SHARED_LIBS ON) endif() From 3c184aac849c9ad31030645165f1826e53df6c37 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 5 Feb 2024 16:45:01 -0500 Subject: [PATCH 20/31] add fft support to info command --- src/KOKKOS/Install.sh | 1 - src/info.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ src/lmpfftsettings.h | 12 ++++++------ 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 112a2e947af..462c0cbe575 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -208,7 +208,6 @@ action kokkos_few.h action kokkos_type.h action kokkos.cpp action kokkos.h -action lmpfftsettings_kokkos.h lmpfftsettings.h action math_special_kokkos.cpp action math_special_kokkos.h action meam_dens_final_kokkos.h meam_dens_final.cpp diff --git a/src/info.cpp b/src/info.cpp index 49b77d60c90..aa3bbb2bcc4 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -97,6 +97,7 @@ enum {COMPUTES=1<<0, DUMP_STYLES=1<<24, COMMAND_STYLES=1<<25, ACCELERATOR=1<<26, + FFT=1<<27, ALL=~0}; static const int STYLES = ATOM_STYLES | INTEGRATE_STYLES | MINIMIZE_STYLES @@ -206,6 +207,9 @@ void Info::command(int narg, char **arg) } else if (strncmp(arg[idx],"accelerator",3) == 0) { flags |= ACCELERATOR; ++idx; + } else if (strncmp(arg[idx],"fft",3) == 0) { + flags |= FFT; + ++idx; } else if (strncmp(arg[idx],"styles",3) == 0) { if (idx+1 < narg) { ++idx; @@ -400,6 +404,42 @@ void Info::command(int narg, char **arg) comm->procgrid[1], comm->procgrid[2]); } + if (flags & FFT) { + fputs("\nFFT information:\n",out); +#if defined(FFT_HEFFTE) + fputs("FFT engine = HeFFTe\n",out); +#if defined(FFT_HEFFTE_MKL) + fputs("FFT library = MKL\n", out); +#elif defined(FFT_HEFFTE_FFTW) + fputs("FFT library = FFTW\n", out); +#else + fputs("FFT library = (builtin)\n", out); +#endif +#else + fputs("FFT engine = mpiFFT\n",out); +#if defined(FFT_MKL) + fputs("FFT library = MKL\n", out); +#elif defined(FFT_FFTW3) + fputs("FFT library = FFTW3\n", out); +#else + fputs("FFT library = KISS\n", out); +#endif +#endif +#if defined(LMP_KOKKOS) + fputs("KOKKOS FFT engine = mpiFFT\n",out); +#if defined(FFT_KOKKOS_CUFFT) + fputs("KOKKOS FFT library = cuFFT\n", out); +#elif defined(FFT_KOKKOS_HIPFFT) + fputs("KOKKOS FFT library = hipFFT\n", out); +#elif defined(FFT_FFTW3) + fputs("KOKKOS FFT library = FFTW3\n", out); +#elif defined(FFT_KOKKOS_MKL) + fputs("KOKKOS FFT library = MKL\n", out); +#else + fputs("KOKKOS FFT library = KISS\n", out); +#endif +#endif + } if (flags & SYSTEM) { fputs("\nSystem information:\n",out); fmt::print(out,"Units = {}\n", update->unit_style); diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index 10c57ee449f..b4e4f15c6b5 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -40,7 +40,7 @@ #elif defined(FFT_HEFFTE_MKL) #define LMP_FFT_LIB "HeFFTe(MKL)" #else -#define LMP_FFT_LIB "HeFFTe(native)" +#define LMP_FFT_LIB "HeFFTe(builtin)" #endif #elif defined(FFT_FFTW3) #define LMP_FFT_LIB "FFTW3" @@ -55,14 +55,14 @@ #endif #ifdef LMP_KOKKOS -#if defined(FFT_KOKKOS_FFTW3) -#define LMP_FFT_KOKKOS_LIB "FFTW3" -#elif defined(FFT_KOKKOS_MKL) -#define LMP_FFT_KOKKOS_LIB "MKL FFT" -#elif defined(FFT_KOKKOS_CUFFT) +#if defined(FFT_KOKKOS_CUFFT) #define LMP_FFT_KOKKOS_LIB "cuFFT" #elif defined(FFT_KOKKOS_HIPFFT) #define LMP_FFT_KOKKOS_LIB "hipFFT" +#elif defined(FFT_KOKKOS_FFTW3) +#define LMP_FFT_KOKKOS_LIB "FFTW3" +#elif defined(FFT_KOKKOS_MKL) +#define LMP_FFT_KOKKOS_LIB "MKL FFT" #else #define LMP_FFT_KOKKOS_LIB "KISS FFT" #endif From efbc0e0579496d14bb88b3e26675b33241ed5403 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 5 Feb 2024 17:22:53 -0500 Subject: [PATCH 21/31] whitespace --- doc/src/kspace_style.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/kspace_style.rst b/doc/src/kspace_style.rst index 78d7380c01f..e7d5e93d72f 100644 --- a/doc/src/kspace_style.rst +++ b/doc/src/kspace_style.rst @@ -451,7 +451,7 @@ relative RMS error. assignment and force interpolation calculations, along with the FFTs themselves, on the GPU or (optionally) threaded on the CPU when using OpenMP and FFTW3. The specific FFT library is selected using - the FFT_KOKKOS CMake parameter. See the + the FFT_KOKKOS CMake parameter. See the :doc:`Build settings ` doc page for how to select a 3rd-party FFT library. From 11b2538cdbf4e9aecd072dddba54f0684a2a8b2e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 5 Feb 2024 17:32:45 -0500 Subject: [PATCH 22/31] print fft info also with -help flag, using single function to create string --- src/info.cpp | 98 +++++++++++++++++++++++++++++++++----------------- src/info.h | 1 + src/lammps.cpp | 6 +++- 3 files changed, 71 insertions(+), 34 deletions(-) diff --git a/src/info.cpp b/src/info.cpp index aa3bbb2bcc4..2b87452d729 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -34,6 +34,7 @@ #include "group.h" #include "improper.h" #include "input.h" +#include "lmpfftsettings.h" #include "modify.h" #include "neighbor.h" #include "output.h" @@ -406,40 +407,9 @@ void Info::command(int narg, char **arg) if (flags & FFT) { fputs("\nFFT information:\n",out); -#if defined(FFT_HEFFTE) - fputs("FFT engine = HeFFTe\n",out); -#if defined(FFT_HEFFTE_MKL) - fputs("FFT library = MKL\n", out); -#elif defined(FFT_HEFFTE_FFTW) - fputs("FFT library = FFTW\n", out); -#else - fputs("FFT library = (builtin)\n", out); -#endif -#else - fputs("FFT engine = mpiFFT\n",out); -#if defined(FFT_MKL) - fputs("FFT library = MKL\n", out); -#elif defined(FFT_FFTW3) - fputs("FFT library = FFTW3\n", out); -#else - fputs("FFT library = KISS\n", out); -#endif -#endif -#if defined(LMP_KOKKOS) - fputs("KOKKOS FFT engine = mpiFFT\n",out); -#if defined(FFT_KOKKOS_CUFFT) - fputs("KOKKOS FFT library = cuFFT\n", out); -#elif defined(FFT_KOKKOS_HIPFFT) - fputs("KOKKOS FFT library = hipFFT\n", out); -#elif defined(FFT_FFTW3) - fputs("KOKKOS FFT library = FFTW3\n", out); -#elif defined(FFT_KOKKOS_MKL) - fputs("KOKKOS FFT library = MKL\n", out); -#else - fputs("KOKKOS FFT library = KISS\n", out); -#endif -#endif + fputs(get_fft_info().c_str(),out); } + if (flags & SYSTEM) { fputs("\nSystem information:\n",out); fmt::print(out,"Units = {}\n", update->unit_style); @@ -1308,6 +1278,68 @@ std::string Info::get_accelerator_info(const std::string &package) /* ---------------------------------------------------------------------- */ +std::string Info::get_fft_info() +{ + std::string fft_info; +#if defined(FFT_SINGLE) + fft_info = "FFT precision = single\n"; +#else + fft_info = "FFT precision = double\n"; +#endif +#if defined(FFT_HEFFTE) + fft_info += "FFT engine = HeFFTe\n"; +#if defined(FFT_HEFFTE_MKL) + fft_info += "FFT library = MKL\n"; +#elif defined(FFT_HEFFTE_FFTW) + fft_info += "FFT library = FFTW\n"; +#else + fft_info += "FFT library = (builtin)\n"; +#endif +#else + fft_info += "FFT engine = mpiFFT\n"; +#if defined(FFT_MKL) +#if defined(FFT_MKL_THREADS) + fft_info += "FFT library = MKL with threads\n"; +#else + fft_info += "FFT library = MKL\n"; +#endif +#elif defined(FFT_FFTW3) +#if defined(FFT_FFTW_THREADS) + fft_info += "FFT library = FFTW3 with threads\n"; +#else + fft_info += "FFT library = FFTW3\n"; +#endif +#else + fft_info += "FFT library = KISS\n"; +#endif +#endif +#if defined(LMP_KOKKOS) + fft_info += "KOKKOS FFT engine = mpiFFT\n"; +#if defined(FFT_KOKKOS_CUFFT) + fft_info += "KOKKOS FFT library = cuFFT\n"; +#elif defined(FFT_KOKKOS_HIPFFT) + fft_info += "KOKKOS FFT library = hipFFT\n"; +#elif defined(FFT_KOKKOS_FFTW3) +#if defined(FFT_KOKKOS_FFTW_THREADS) + fft_info += "KOKKOS FFT library = FFTW3 with threads\n"; +#else + fft_info += "KOKKOS FFT library = FFTW3\n"; +#endif +#elif defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_THREADS) + fft_info += "KOKKOS FFT library = MKL with threads\n"; +#else + fft_info += "KOKKOS FFT library = MKL\n"; +#endif +#else + fft_info += "KOKKOS FFT library = KISS\n"; +#endif +#endif + return fft_info; +} + +/* ---------------------------------------------------------------------- */ + void Info::get_memory_info(double *meminfo) { double bytes = 0; diff --git a/src/info.h b/src/info.h index c4230b063ea..8fd725abf6c 100644 --- a/src/info.h +++ b/src/info.h @@ -47,6 +47,7 @@ class Info : public Command { static bool has_package(const std::string &); static bool has_accelerator_feature(const std::string &, const std::string &, const std::string &); + static std::string get_fft_info(); static bool has_gpu_device(); static std::string get_gpu_device_info(); static std::string get_accelerator_info(const std::string &pkg = ""); diff --git a/src/lammps.cpp b/src/lammps.cpp index b3d2171152d..2a71714dcce 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -66,6 +66,7 @@ #include "lmpinstalledpkgs.h" #include "lmpgitversion.h" +#include "lmpfftsettings.h" #if defined(LAMMPS_UPDATE) #define UPDATE_STRING " - " LAMMPS_UPDATE @@ -1446,7 +1447,10 @@ void LAMMPS::print_config(FILE *fp) fmt::print(fp,"Compatible GPU present: {}\n\n",Info::has_gpu_device() ? "yes" : "no"); #endif - fputs("Active compile time flags:\n\n",fp); + fputs("FFT information:\n\n",fp); + fputs(Info::get_fft_info().c_str(),fp); + + fputs("\nActive compile time flags:\n\n",fp); if (Info::has_gzip_support()) fputs("-DLAMMPS_GZIP\n",fp); if (Info::has_png_support()) fputs("-DLAMMPS_PNG\n",fp); if (Info::has_jpeg_support()) fputs("-DLAMMPS_JPEG\n",fp); From de39bcc1fdc36fbee3f8192376469f541548260d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 5 Feb 2024 17:37:53 -0500 Subject: [PATCH 23/31] update documentation --- doc/src/Build_settings.rst | 17 +++++++++-------- doc/src/info.rst | 9 ++++++++- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 949d3085105..a9a9eb25bf1 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -70,7 +70,7 @@ libraries and better pipelining for packing and communication. the FFT library selected by the FFT_KOKKOS variable applies. Otherwise, the FFT library selected by the FFT variable applies. The same FFT settings apply to both. FFT_KOKKOS must be compatible with the - Kokkos backend - for example, when using the CUDA backend of Kokkos, + Kokkos back end - for example, when using the CUDA back end of Kokkos, you must use either CUFFT or KISS. Usually these settings are all that is needed. If FFTW3 is @@ -186,9 +186,10 @@ The Intel MKL math library is part of the Intel compiler suite. It can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting above). -The CUFFT and HIPFFT FFT libraries are packaged with NVIDIA's CUDA and AMD's -HIP installations, respectively. These FFT libraries require the Kokkos acceleration -package to be enabled and the Kokkos backend to be GPU-resident (ie, HIP or CUDA). +The cuFFT and hipFFT FFT libraries are packaged with NVIDIA's CUDA and +AMD's HIP installations, respectively. These FFT libraries require the +Kokkos acceleration package to be enabled and the Kokkos back end to be +GPU-resident (i.e., HIP or CUDA). Performing 3d FFTs in parallel can be time-consuming due to data access and required communication. This cost can be reduced by performing @@ -201,11 +202,11 @@ generally less than the difference in precision. Using the ``-DFFT_SINGLE`` setting trades off a little accuracy for reduced memory use and parallel communication costs for transposing 3d FFT data. -When using ``-DFFT_SINGLE`` with FFTW3, you may need to build the FFTW -library a second time with support for single-precision. +When using ``-DFFT_SINGLE`` with FFTW3, you may need to ensure that +the FFTW3 installation includes support for single-precision. -For FFTW3, do the following, which should produce the additional -library ``libfftw3f.a`` or ``libfftw3f.so``\ . +When compiler FFTW3 from source, you can do the following, which should +produce the additional libraries ``libfftw3f.a`` and/or ``libfftw3f.so``\ . .. code-block:: bash diff --git a/doc/src/info.rst b/doc/src/info.rst index 958542e3c84..ac70034c302 100644 --- a/doc/src/info.rst +++ b/doc/src/info.rst @@ -10,7 +10,7 @@ Syntax info args -* args = one or more of the following keywords: *out*, *all*, *system*, *memory*, *communication*, *computes*, *dumps*, *fixes*, *groups*, *regions*, *variables*, *coeffs*, *styles*, *time*, *accelerator*, or *configuration* +* args = one or more of the following keywords: *out*, *all*, *system*, *memory*, *communication*, *computes*, *dumps*, *fixes*, *groups*, *regions*, *variables*, *coeffs*, *styles*, *time*, *accelerator*, *fft* or *configuration* * *out* values = *screen*, *log*, *append* filename, *overwrite* filename * *styles* values = *all*, *angle*, *atom*, *bond*, *compute*, *command*, *dump*, *dihedral*, *fix*, *improper*, *integrate*, *kspace*, *minimize*, *pair*, *region* @@ -92,6 +92,13 @@ The *accelerator* category prints out information about compile time settings of included accelerator support for the GPU, KOKKOS, INTEL, and OPENMP packages. +.. versionadded:: TBD + +The *fft* category prints out information about the included 3d-FFT +support. This lists the 3d-FFT engine, FFT precision, FFT library +used by the FFT engine. If the KOKKOS package is included, the settings +used for the KOKKOS package are displayed as well. + The *styles* category prints the list of styles available in the current LAMMPS binary. It supports one of the following options to control which category of styles is printed out: From 34cd8ad3bcc521ff023c4f3e332fee290ed73a3e Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 6 Feb 2024 11:01:42 -0700 Subject: [PATCH 24/31] Small cleanup --- src/KOKKOS/fft3d_kokkos.cpp | 48 ++++++------------- src/KOKKOS/fft3d_kokkos.h | 2 - src/KOKKOS/fftdata_kokkos.h | 4 +- src/KOKKOS/fix_acks2_reaxff_kokkos.cpp | 6 +-- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 2 +- src/KOKKOS/fix_shake_kokkos.cpp | 8 ++-- src/KOKKOS/meam_dens_init_kokkos.h | 2 +- src/KOKKOS/meam_force_kokkos.h | 2 +- src/KOKKOS/pair_adp_kokkos.cpp | 6 +-- src/KOKKOS/pair_dpd_ext_kokkos.cpp | 4 +- src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp | 4 +- src/KOKKOS/pair_dpd_kokkos.cpp | 4 +- src/KOKKOS/pair_dpd_tstat_kokkos.cpp | 4 +- src/KOKKOS/pair_eam_alloy_kokkos.cpp | 8 ++-- src/KOKKOS/pair_eam_fs_kokkos.cpp | 8 ++-- src/KOKKOS/pair_eam_kokkos.cpp | 8 ++-- src/KOKKOS/pair_pace_extrapolation_kokkos.cpp | 4 +- src/KOKKOS/pair_pace_kokkos.cpp | 4 +- src/KOKKOS/pair_reaxff_kokkos.cpp | 14 +++--- src/KOKKOS/pair_snap_kokkos_impl.h | 4 +- src/KOKKOS/pair_sw_kokkos.cpp | 6 +-- src/KOKKOS/pair_tersoff_kokkos.cpp | 6 +-- src/KOKKOS/pair_tersoff_mod_kokkos.cpp | 6 +-- src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 6 +-- src/KOKKOS/remap_kokkos.cpp | 18 +++---- src/KOKKOS/remap_kokkos.h | 2 +- 26 files changed, 84 insertions(+), 106 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index c50af939a4d..d3814b204e3 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -15,6 +15,7 @@ /* ---------------------------------------------------------------------- Contributing authors: Stan Moore (SNL), Sam Mish (U.C. Davis), Nick Hagerty (ORNL) ------------------------------------------------------------------------- */ + #include "fft3d_kokkos.h" #include "error.h" @@ -37,7 +38,7 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int int out_ilo, int out_ihi, int out_jlo, int out_jhi, int out_klo, int out_khi, int scaled, int permute, int *nbuf, int usecollective, - int usecuda_aware) : + int usegpu_aware) : Pointers(lmp) { int nthreads = lmp->kokkos->nthreads; @@ -73,7 +74,7 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int plan = fft_3d_create_plan_kokkos(comm,nfast,nmid,nslow, in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi, - scaled,permute,nbuf,usecollective,nthreads,usecuda_aware); + scaled,permute,nbuf,usecollective,nthreads,usegpu_aware); if (plan == nullptr) error->one(FLERR,"Could not create 3d FFT plan"); } @@ -155,7 +156,7 @@ struct norm_functor { *(out_ptr++) *= norm; #elif defined(FFT_KOKKOS_MKL) d_out(i) *= norm; -#else // FFT_KISS +#else // FFT_KOKKOS_KISS d_out(i).re *= norm; d_out(i).im *= norm; #endif @@ -375,7 +376,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, 2 = permute twice = slow->fast, fast->mid, mid->slow nbuf returns size of internal storage buffers used by FFT usecollective use collective MPI operations for remapping data - usecuda_aware use CUDA-Aware MPI or not + usegpu_aware use GPU-Aware MPI or not ------------------------------------------------------------------------- */ template @@ -386,7 +387,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl int out_ilo, int out_ihi, int out_jlo, int out_jhi, int out_klo, int out_khi, int scaled, int permute, int *nbuf, int usecollective, - int nthreads, int usecuda_aware) + int nthreads, int usegpu_aware) { struct fft_plan_3d_kokkos *plan; int me,nprocs; @@ -418,7 +419,6 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl // not needed if all procs own entire fast axis initially // first indices = distribution after 1st set of FFTs - if (in_ilo == 0 && in_ihi == nfast-1) flag = 0; else flag = 1; @@ -444,7 +444,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi,2,0,0,FFT_PRECISION, - usecollective,usecuda_aware); + usecollective,usegpu_aware); if (plan->pre_plan == nullptr) return nullptr; } @@ -469,7 +469,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, second_klo,second_khi,2,1,0,FFT_PRECISION, - usecollective,usecuda_aware); + usecollective,usegpu_aware); if (plan->mid1_plan == nullptr) return nullptr; // 1d FFTs along mid axis @@ -510,7 +510,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, third_ilo,third_ihi,2,1,0,FFT_PRECISION, - usecollective,usecuda_aware); + usecollective,usegpu_aware); if (plan->mid2_plan == nullptr) return nullptr; // 1d FFTs along slow axis @@ -538,7 +538,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION, - usecollective,usecuda_aware); + usecollective,usegpu_aware); if (plan->post_plan == nullptr) return nullptr; } @@ -714,17 +714,17 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl hipfftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, - HIPFFT_KOKKOS_TYPE,plan->total1/plan->length1); + HIPFFT_TYPE,plan->total1/plan->length1); hipfftPlanMany(&(plan->plan_mid), 1, &nmid, &nmid,1,plan->length2, &nmid,1,plan->length2, - HIPFFT_KOKKOS_TYPE,plan->total2/plan->length2); + HIPFFT_TYPE,plan->total2/plan->length2); hipfftPlanMany(&(plan->plan_slow), 1, &nslow, &nslow,1,plan->length3, &nslow,1,plan->length3, - HIPFFT_KOKKOS_TYPE,plan->total3/plan->length3); + HIPFFT_TYPE,plan->total3/plan->length3); #else /* FFT_KOKKOS_KISS */ @@ -805,27 +805,6 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk delete remapKK; } -/* ---------------------------------------------------------------------- - divide n into 2 factors of as equal size as possible -------------------------------------------------------------------------- */ - -template -void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) -{ - int n1,n2,facmax; - - facmax = static_cast (sqrt((double) n)); - - for (n1 = facmax; n1 > 0; n1--) { - n2 = n/n1; - if (n1*n2 == n) { - *factor1 = n1; - *factor2 = n2; - return; - } - } -} - /* ---------------------------------------------------------------------- perform just the 1d FFTs needed by a 3d FFT, no data movement used for timing purposes @@ -855,6 +834,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set + #if defined(FFT_KOKKOS_MKL) || defined(FFT_KOKKOS_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index ed49c4b1eed..a7349878ecf 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -106,8 +106,6 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); - - void bifactor(int, int *, int *); }; } diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 7f900002e5d..c0a223478c8 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -108,11 +108,11 @@ #include #if defined(FFT_SINGLE) #define hipfftExec hipfftExecC2C - #define HIPFFT_KOKKOS_TYPE HIPFFT_C2C + #define HIPFFT_TYPE HIPFFT_C2C typedef hipfftComplex FFT_KOKKOS_DATA; #else #define hipfftExec hipfftExecZ2Z - #define HIPFFT_KOKKOS_TYPE HIPFFT_Z2Z + #define HIPFFT_TYPE HIPFFT_Z2Z typedef hipfftDoubleComplex FFT_KOKKOS_DATA; #endif #else diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index 5256b7f4a49..b1b98519645 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -866,7 +866,7 @@ template KOKKOS_INLINE_FUNCTION void FixACKS2ReaxFFKokkos::compute_x_item(int ii, int &m_fill, const bool &final) const { - // The X_diag array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The X_diag array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_X_diag = ScatterViewHelper,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag); auto a_X_diag = v_X_diag.template access>(); @@ -944,7 +944,7 @@ void FixACKS2ReaxFFKokkos::compute_x_team( const typename Kokkos::TeamPolicy::member_type &team, int atoms_per_team, int vector_length) const { - // The X_diag array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The X_diag array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_X_diag = ScatterViewHelper,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag); auto a_X_diag = v_X_diag.template access>(); @@ -1458,7 +1458,7 @@ template KOKKOS_INLINE_FUNCTION void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec3_Half, const int &ii) const { - // The bb array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The bb array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_bb = ScatterViewHelper,decltype(dup_bb),decltype(ndup_bb)>::get(dup_bb,ndup_bb); auto a_bb = v_bb.template access>(); diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 06485eb1ccd..2cf6bcfba7a 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -928,7 +928,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqSparseMatvec2_Half,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o); auto a_o = v_o.template access>(); diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 81489142db9..5705163f64d 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -643,7 +643,7 @@ KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake(int ilist, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -753,7 +753,7 @@ KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake3(int ilist, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -933,7 +933,7 @@ KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake4(int ilist, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1190,7 +1190,7 @@ KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake3angle(int ilist, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); diff --git a/src/KOKKOS/meam_dens_init_kokkos.h b/src/KOKKOS/meam_dens_init_kokkos.h index 60bb6553d80..68e69430fd9 100644 --- a/src/KOKKOS/meam_dens_init_kokkos.h +++ b/src/KOKKOS/meam_dens_init_kokkos.h @@ -481,7 +481,7 @@ void MEAMKokkos::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh, int offset) const { - // The rho0, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The rho0, etc. arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_rho0 = ScatterViewHelper,decltype(dup_rho0),decltype(ndup_rho0)>::get(dup_rho0,ndup_rho0); auto a_rho0 = v_rho0.template access>(); auto v_arho2b = ScatterViewHelper,decltype(dup_arho2b),decltype(ndup_arho2b)>::get(dup_arho2b,ndup_arho2b); diff --git a/src/KOKKOS/meam_force_kokkos.h b/src/KOKKOS/meam_force_kokkos.h index d086230fc70..fec923f5b26 100644 --- a/src/KOKKOS/meam_force_kokkos.h +++ b/src/KOKKOS/meam_force_kokkos.h @@ -131,7 +131,7 @@ KOKKOS_INLINE_FUNCTION void MEAMKokkos::operator()(TagMEAMForce, decltype(dup_f), decltype(ndup_f)>::get( diff --git a/src/KOKKOS/pair_adp_kokkos.cpp b/src/KOKKOS/pair_adp_kokkos.cpp index 86ba3d267ee..d02edc43ab0 100644 --- a/src/KOKKOS/pair_adp_kokkos.cpp +++ b/src/KOKKOS/pair_adp_kokkos.cpp @@ -671,7 +671,7 @@ void PairADPKokkos::operator()(TagPairADPKernelA,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho); auto a_rho = v_rho.template access>(); @@ -929,7 +929,7 @@ template KOKKOS_INLINE_FUNCTION void PairADPKokkos::operator()(TagPairADPKernelC, const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1087,7 +1087,7 @@ void PairADPKokkos::ev_tally_xyz(EV_FLOAT &ev, const int &i, const i const int EFLAG = eflag; const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_dpd_ext_kokkos.cpp b/src/KOKKOS/pair_dpd_ext_kokkos.cpp index 3624208c6b4..636235d1c81 100644 --- a/src/KOKKOS/pair_dpd_ext_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_ext_kokkos.cpp @@ -228,7 +228,7 @@ template KOKKOS_INLINE_FUNCTION void PairDPDExtKokkos::operator() (TagDPDExtKokkos, const int &ii, EV_FLOAT &ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -354,7 +354,7 @@ void PairDPDExtKokkos::ev_tally_xyz(EV_FLOAT &ev, const int &i, cons const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp b/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp index 9808c53856e..213b344fbba 100644 --- a/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp @@ -232,7 +232,7 @@ template KOKKOS_INLINE_FUNCTION void PairDPDExtTstatKokkos::operator() (TagDPDExtTstatKokkos, const int &ii, EV_FLOAT &ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -346,7 +346,7 @@ void PairDPDExtTstatKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_dpd_kokkos.cpp b/src/KOKKOS/pair_dpd_kokkos.cpp index 5dca219cdf1..f888b5f6ce1 100644 --- a/src/KOKKOS/pair_dpd_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_kokkos.cpp @@ -228,7 +228,7 @@ template KOKKOS_INLINE_FUNCTION void PairDPDKokkos::operator() (TagDPDKokkos, const int &ii, EV_FLOAT &ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -319,7 +319,7 @@ void PairDPDKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int & const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_dpd_tstat_kokkos.cpp b/src/KOKKOS/pair_dpd_tstat_kokkos.cpp index 78cc862ac03..63dbda3b59e 100644 --- a/src/KOKKOS/pair_dpd_tstat_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_tstat_kokkos.cpp @@ -231,7 +231,7 @@ template KOKKOS_INLINE_FUNCTION void PairDPDTstatKokkos::operator() (TagDPDTstatKokkos, const int &ii, EV_FLOAT &ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -312,7 +312,7 @@ void PairDPDTstatKokkos::v_tally(EV_FLOAT &ev, const int &i, const i const F_FLOAT &dely, const F_FLOAT &delz) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 0dfe56c3650..93ed9fc620d 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -566,7 +566,7 @@ void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelA,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho); auto a_rho = v_rho.template access>(); @@ -733,7 +733,7 @@ template KOKKOS_INLINE_FUNCTION void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -943,7 +943,7 @@ void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1076,7 +1076,7 @@ void PairEAMAlloyKokkos::ev_tally(EV_FLOAT &ev, const int &i, const const int EFLAG = eflag; const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index 58ff615c04d..5dee6013021 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -566,7 +566,7 @@ void PairEAMFSKokkos::operator()(TagPairEAMFSKernelA,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho); auto a_rho = v_rho.template access>(); @@ -733,7 +733,7 @@ template KOKKOS_INLINE_FUNCTION void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC, const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -943,7 +943,7 @@ void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1076,7 +1076,7 @@ void PairEAMFSKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int const int EFLAG = eflag; const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 864f7360660..32f4afe2256 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -561,7 +561,7 @@ void PairEAMKokkos::operator()(TagPairEAMKernelA,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho); auto a_rho = v_rho.template access>(); @@ -728,7 +728,7 @@ template KOKKOS_INLINE_FUNCTION void PairEAMKokkos::operator()(TagPairEAMKernelC, const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -938,7 +938,7 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1071,7 +1071,7 @@ void PairEAMKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int & const int EFLAG = eflag; const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp index ef747ef95c7..e7d376c8707 100644 --- a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp @@ -1652,7 +1652,7 @@ template KOKKOS_INLINE_FUNCTION void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeForce, const int& ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); const auto a_f = v_f.template access>(); @@ -1721,7 +1721,7 @@ void PairPACEExtrapolationKokkos::v_tally_xyz(EV_FLOAT &ev, const in const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index 4046649375b..4407d1231ef 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -1561,7 +1561,7 @@ template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::operator() (TagPairPACEComputeForce, const int& ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); const auto a_f = v_f.template access>(); @@ -1630,7 +1630,7 @@ void PairPACEKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const i const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 505681acb3d..7dd86e07a9f 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -1149,7 +1149,7 @@ template KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb, const int &ii, EV_FLOAT_REAX& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1345,7 +1345,7 @@ template KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulomb, const int &ii, EV_FLOAT_REAX& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -3834,7 +3834,7 @@ void PairReaxFFKokkos::ev_tally(EV_FLOAT_REAX &ev, const int &i, con const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -3890,7 +3890,7 @@ KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::e_tally(EV_FLOAT_REAX & /*ev*/, const int &i, const int &j, const F_FLOAT &epair) const { - // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -3908,7 +3908,7 @@ KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::e_tally_single(EV_FLOAT_REAX & /*ev*/, const int &i, const F_FLOAT &epair) const { - // The eatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -3959,7 +3959,7 @@ KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::v_tally3(EV_FLOAT_REAX &ev, const int &i, const int &j, const int &k, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); @@ -3999,7 +3999,7 @@ KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::v_tally4(EV_FLOAT_REAX &ev, const int &i, const int &j, const int &k, const int &l, F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *dril, F_FLOAT *drjl, F_FLOAT *drkl) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial F_FLOAT v[6]; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 7b9fda60dbd..839240c62f3 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -1265,7 +1265,7 @@ template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeForce, const int& ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -1365,7 +1365,7 @@ void PairSNAPKokkos::v_tally_xyz(EV_FLOAT const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index 294b451e7ec..69c34fdc932 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -235,7 +235,7 @@ template KOKKOS_INLINE_FUNCTION void PairSWKokkos::operator()(TagPairSWCompute, const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); @@ -532,7 +532,7 @@ void PairSWKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -596,7 +596,7 @@ void PairSWKokkos::ev_tally3(EV_FLOAT &ev, const int &i, const int & { F_FLOAT epairthird,v[6]; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index e860b0232da..1a0d45e4357 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -336,7 +336,7 @@ template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos::tersoff_compute(const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); const auto a_f = v_f.template access>(); @@ -1003,7 +1003,7 @@ void PairTersoffKokkos::ev_tally(EV_FLOAT &ev, const int &i, const i const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -1061,7 +1061,7 @@ void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index 02a51af3d7e..b941755d4b5 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -326,7 +326,7 @@ template KOKKOS_INLINE_FUNCTION void PairTersoffMODKokkos::tersoff_mod_compute(const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); const auto a_f = v_f.template access>(); @@ -899,7 +899,7 @@ void PairTersoffMODKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -956,7 +956,7 @@ KOKKOS_INLINE_FUNCTION void PairTersoffMODKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 8bad880a4f9..08d6cb17d7f 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -339,7 +339,7 @@ template KOKKOS_INLINE_FUNCTION void PairTersoffZBLKokkos::tersoff_zbl_compute(const int &ii, EV_FLOAT& ev) const { - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); const auto a_f = v_f.template access>(); @@ -963,7 +963,7 @@ void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The eatom and vatom arrays are duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); @@ -1021,7 +1021,7 @@ void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const { - // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index efc6742a259..8cd3aec6ddb 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -38,13 +38,13 @@ RemapKokkos::RemapKokkos(LAMMPS *lmp, MPI_Comm comm, int out_klo, int out_khi, int nqty, int permute, int memory, int precision, int usecollective, - int usecuda_aware) : Pointers(lmp) + int usegpu_aware) : Pointers(lmp) { plan = remap_3d_create_plan_kokkos(comm, in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi, nqty,permute,memory,precision,usecollective, - usecuda_aware); + usegpu_aware); if (plan == nullptr) error->one(FLERR,"Could not create 3d remap plan"); } @@ -121,7 +121,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // post all recvs into scratch space FFT_SCALAR* v_scratch = d_scratch.data(); - if (!plan->usecuda_aware) { + if (!plan->usegpu_aware) { plan->h_scratch = Kokkos::create_mirror_view(d_scratch); v_scratch = plan->h_scratch.data(); } @@ -134,7 +134,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d } FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); - if (!plan->usecuda_aware) { + if (!plan->usegpu_aware) { plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); v_sendbuf = plan->h_sendbuf.data(); } @@ -146,7 +146,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d plan->pack(d_in,in_offset, plan->d_sendbuf,0,&plan->packplan[isend]); - if (!plan->usecuda_aware) + if (!plan->usegpu_aware) Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, @@ -178,7 +178,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d int scratch_offset = plan->recv_bufloc[irecv]; int out_offset = plan->recv_offset[irecv]; - if (!plan->usecuda_aware) + if (!plan->usegpu_aware) Kokkos::deep_copy(d_scratch,plan->h_scratch); plan->unpack(d_scratch,scratch_offset, @@ -209,7 +209,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d 1 = single precision (4 bytes per datum) 2 = double precision (8 bytes per datum) usecollective whether to use collective MPI or point-to-point - usecuda_aware whether to use CUDA-Aware MPI or not + usegpu_aware whether to use GPU-Aware MPI or not ------------------------------------------------------------------------- */ template @@ -220,7 +220,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat int out_ilo, int out_ihi, int out_jlo, int out_jhi, int out_klo, int out_khi, int nqty, int permute, int memory, int /*precision*/, - int usecollective, int usecuda_aware) + int usecollective, int usegpu_aware) { struct remap_plan_3d_kokkos *plan; @@ -238,7 +238,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat plan = new struct remap_plan_3d_kokkos; if (plan == nullptr) return nullptr; plan->usecollective = usecollective; - plan->usecuda_aware = usecuda_aware; + plan->usegpu_aware = usegpu_aware; // store parameters in local data structs diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index a62c14f00b9..77a3b1a37af 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -54,7 +54,7 @@ struct remap_plan_3d_kokkos { int usecollective; // use collective or point-to-point MPI int commringlen; // length of commringlist int *commringlist; // ranks on communication ring of this plan - int usecuda_aware; // use CUDA-Aware MPI or not + int usegpu_aware; // use GPU-Aware MPI or not }; template From 431788db496392af3505612a507e0ab2214e2b7d Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 6 Feb 2024 11:31:16 -0700 Subject: [PATCH 25/31] Need bifactor duplicated code --- src/KOKKOS/fft3d_kokkos.cpp | 21 +++++++++++++++++++++ src/KOKKOS/fft3d_kokkos.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index d3814b204e3..1f39e173263 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -805,6 +805,27 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk delete remapKK; } +/* ---------------------------------------------------------------------- + divide n into 2 factors of as equal size as possible +------------------------------------------------------------------------- */ + +template +void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) +{ + int n1,n2,facmax; + + facmax = static_cast (sqrt((double) n)); + + for (n1 = facmax; n1 > 0; n1--) { + n2 = n/n1; + if (n1*n2 == n) { + *factor1 = n1; + *factor2 = n2; + return; + } + } +} + /* ---------------------------------------------------------------------- perform just the 1d FFTs needed by a 3d FFT, no data movement used for timing purposes diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index a7349878ecf..ed49c4b1eed 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -106,6 +106,8 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + + void bifactor(int, int *, int *); }; } From 46b39d2ca4877c8fefb20a6ff8ab5b78ed597541 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 6 Feb 2024 12:07:48 -0700 Subject: [PATCH 26/31] Add warning --- src/KOKKOS/fft3d_kokkos.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 1f39e173263..e9ab095de99 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -69,6 +69,13 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int if (stack_size < 2048) cudaDeviceSetLimit(cudaLimitStackSize,2048); #endif + + #if defined (LMP_KOKKOS_GPU) + int me; + MPI_Comm_rank(comm,&me); + if (me == 0) + lmp->warning(FLERR,"Using default KISS FFT with Kokkos GPU backends may give suboptimal performance"); + #endif #endif plan = fft_3d_create_plan_kokkos(comm,nfast,nmid,nslow, From beaab56826b9ed9f967f30ba6b717dea81fa2428 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 6 Feb 2024 12:08:00 -0700 Subject: [PATCH 27/31] Move define checks --- src/KOKKOS/pppm_kokkos.h | 30 ------------------------------ src/lmpfftsettings.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 14d4670dbdf..d29f036d450 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -33,36 +33,6 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos); // clang-format off -// fix up FFT defines for KOKKOS with CUDA and HIP - -#ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_KOKKOS_FFTW) -# undef FFT_KOKKOS_FFTW -# endif -# if defined(FFT_KOKKOS_FFTW3) -# undef FFT_KOKKOS_FFTW3 -# endif -# if defined(FFT_KOKKOS_MKL) -# undef FFT_KOKKOS_MKL -# endif -# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) -# define FFT_KOKKOS_KISSFFT -# endif -#elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_KOKKOS_FFTW) -# undef FFT_KOKKOS_FFTW -# endif -# if defined(FFT_KOKKOS_FFTW3) -# undef FFT_KOKKOS_FFTW3 -# endif -# if defined(FFT_KOKKOS_MKL) -# undef FFT_KOKKOS_MKL -# endif -# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) -# define FFT_KOKKOS_KISSFFT -# endif -#endif - #include "pppm.h" namespace LAMMPS_NS { diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index b4e4f15c6b5..56314bf9c5b 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -20,6 +20,7 @@ #ifdef FFT_FFTW #ifndef FFT_FFTW3 +#undef FFT_FFTW #define FFT_FFTW3 #endif #endif @@ -27,6 +28,7 @@ #ifdef LMP_KOKKOS #ifdef FFT_KOKKOS_FFTW #ifndef FFT_KOKKOS_FFTW3 +#undef FFT_KOKKOS_FFTW #define FFT_KOKKOS_FFTW3 #endif #endif @@ -55,6 +57,37 @@ #endif #ifdef LMP_KOKKOS + +// fix up FFT defines for KOKKOS with CUDA and HIP + +#ifdef KOKKOS_ENABLE_CUDA +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW +# endif +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 +# endif +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL +# endif +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT +# endif +#elif defined(KOKKOS_ENABLE_HIP) +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW +# endif +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 +# endif +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL +# endif +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) +# define FFT_KOKKOS_KISSFFT +# endif +#endif + #if defined(FFT_KOKKOS_CUFFT) #define LMP_FFT_KOKKOS_LIB "cuFFT" #elif defined(FFT_KOKKOS_HIPFFT) From 5adbc09b71f285d7781906cd2a540c19a4c1cc10 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 6 Feb 2024 12:33:01 -0700 Subject: [PATCH 28/31] Move warning to avoid multiple instances --- src/KOKKOS/fft3d_kokkos.cpp | 7 ------- src/KOKKOS/pppm_kokkos.cpp | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index e9ab095de99..1f39e173263 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -69,13 +69,6 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int if (stack_size < 2048) cudaDeviceSetLimit(cudaLimitStackSize,2048); #endif - - #if defined (LMP_KOKKOS_GPU) - int me; - MPI_Comm_rank(comm,&me); - if (me == 0) - lmp->warning(FLERR,"Using default KISS FFT with Kokkos GPU backends may give suboptimal performance"); - #endif #endif plan = fft_3d_create_plan_kokkos(comm,nfast,nmid,nslow, diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index b83c7e03135..6a2618642bb 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -20,6 +20,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" +#include "comm.h" #include "domain.h" #include "error.h" #include "fft3d_kokkos.h" @@ -105,6 +106,13 @@ PPPMKokkos::PPPMKokkos(LAMMPS *lmp) : PPPM(lmp) fft1 = nullptr; fft2 = nullptr; remap = nullptr; + +#if defined (LMP_KOKKOS_GPU) + #if defined(FFT_KOKKOS_KISSFFT) + if (comm->me == 0) + error->warning(FLERR,"Using default KISS FFT with Kokkos GPU backends may give suboptimal performance"); + #endif +#endif } /* ---------------------------------------------------------------------- From 19a5de1be6c1d1d7c02ee2a430f99d5c6904f168 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 6 Feb 2024 15:11:31 -0500 Subject: [PATCH 29/31] silence compiler warning --- src/KSPACE/msm.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index 57d16006d11..60bb768911e 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -1602,8 +1602,7 @@ void MSM::direct(int n) qtmp = qgridn[icz][icy][icx]; // charge on center grid point esum = 0.0; - if (vflag_either && !scalar_pressure_flag) - v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0; + v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0; // use hemisphere to avoid double computation of pair-wise // interactions in direct sum (no computations in -z direction) From 2f4dbdceb6465d2180cd0c5b399a999708409b42 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 6 Feb 2024 13:28:33 -0700 Subject: [PATCH 30/31] Remove redundant checks --- src/KOKKOS/fftdata_kokkos.h | 48 ------------------------------------- src/lmpfftsettings.h | 26 ++++++++++++++------ 2 files changed, 19 insertions(+), 55 deletions(-) diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index c0a223478c8..f9946391063 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -27,54 +27,6 @@ // ------------------------------------------------------------------------- -// with KOKKOS in CUDA or HIP mode we can only have -// CUFFT/HIPFFT or KISSFFT, thus undefine all other -// FFTs here, since they may be valid in fft3d.cpp - -#ifdef KOKKOS_ENABLE_CUDA -# if defined(FFT_KOKKOS_FFTW) -# undef FFT_KOKKOS_FFTW -# endif -# if defined(FFT_KOKKOS_FFTW3) -# undef FFT_KOKKOS_FFTW3 -# endif -# if defined(FFT_KOKKOS_MKL) -# undef FFT_KOKKOS_MKL -# endif -# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) -# define FFT_KOKKOS_KISSFFT -# endif -#elif defined(KOKKOS_ENABLE_HIP) -# if defined(FFT_KOKKOS_FFTW) -# undef FFT_KOKKOS_FFTW -# endif -# if defined(FFT_KOKKOS_FFTW3) -# undef FFT_KOKKOS_FFTW3 -# endif -# if defined(FFT_KOKKOS_MKL) -# undef FFT_KOKKOS_MKL -# endif -# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) -# define FFT_KOKKOS_KISSFFT -# endif -#else -# if defined(FFT_KOKKOS_CUFFT) -# error "Must enable CUDA with KOKKOS to use -DFFT_KOKKOS_CUFFT" -# endif -# if defined(FFT_KOKKOS_HIPFFT) -# error "Must enable HIP with KOKKOS to use -DFFT_KOKKOS_HIPFFT" -# endif -// if user set FFTW, it means FFTW3 -# ifdef FFT_KOKKOS_FFTW -# define FFT_KOKKOS_FFTW3 -# endif -# ifdef FFT_KOKKOS_FFTW_THREADS -# if !defined(FFT_KOKKOS_FFTW3) -# error "Must use -DFFT_KOKKOS_FFTW3 with -DFFT_KOKKOS_FFTW_THREADS" -# endif -# endif -#endif - #if defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" #if defined(FFT_SINGLE) diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index 56314bf9c5b..b1a4cffa002 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -26,12 +26,15 @@ #endif #ifdef LMP_KOKKOS -#ifdef FFT_KOKKOS_FFTW -#ifndef FFT_KOKKOS_FFTW3 -#undef FFT_KOKKOS_FFTW -#define FFT_KOKKOS_FFTW3 -#endif -#endif +# ifdef FFT_KOKKOS_FFTW +# undef FFT_KOKKOS_FFTW +# define FFT_KOKKOS_FFTW3 +# endif +# ifdef FFT_KOKKOS_FFTW_THREADS +# if !defined(FFT_KOKKOS_FFTW3) +# error "Must use -DFFT_KOKKOS_FFTW3 with -DFFT_KOKKOS_FFTW_THREADS" +# endif +# endif #endif // set strings for library info output @@ -58,7 +61,9 @@ #ifdef LMP_KOKKOS -// fix up FFT defines for KOKKOS with CUDA and HIP +// with KOKKOS in CUDA or HIP mode we can only have +// CUFFT/HIPFFT or KISSFFT, thus undefine all other +// FFTs here #ifdef KOKKOS_ENABLE_CUDA # if defined(FFT_KOKKOS_FFTW) @@ -86,6 +91,13 @@ # if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) # define FFT_KOKKOS_KISSFFT # endif +#else +# if defined(FFT_KOKKOS_CUFFT) +# error "Must enable CUDA with KOKKOS to use -DFFT_KOKKOS_CUFFT" +# endif +# if defined(FFT_KOKKOS_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_KOKKOS_HIPFFT" +# endif #endif #if defined(FFT_KOKKOS_CUFFT) From c1024c811f128d21da3bddc0bbf7159141a3b565 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 6 Feb 2024 14:05:04 -0700 Subject: [PATCH 31/31] Fix name issue --- src/KOKKOS/fft3d_kokkos.cpp | 6 +++--- src/KOKKOS/fft3d_kokkos.h | 2 +- src/KOKKOS/fftdata_kokkos.h | 6 +++--- src/KOKKOS/pppm_kokkos.cpp | 2 +- src/lmpfftsettings.h | 10 +++++----- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 1f39e173263..1610ae9b4e0 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -58,7 +58,7 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs"); -#elif defined(FFT_KOKKOS_KISSFFT) +#elif defined(FFT_KOKKOS_KISS) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread // stack size on GPUs needs to be increased to prevent stack overflows @@ -163,7 +163,7 @@ struct norm_functor { } }; -#ifdef FFT_KOKKOS_KISSFFT +#ifdef FFT_KOKKOS_KISS template struct kiss_fft_functor { public: @@ -797,7 +797,7 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk FFTW_API(cleanup_threads)(); #endif -#elif defined (FFT_KOKKOS_KISSFFT) +#elif defined (FFT_KOKKOS_KISS) delete kissfftKK; #endif diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index ed49c4b1eed..48b0fd76de8 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -92,7 +92,7 @@ class FFT3dKokkos : protected Pointers { struct fft_plan_3d_kokkos *plan; RemapKokkos *remapKK; -#ifdef FFT_KOKKOS_KISSFFT +#ifdef FFT_KOKKOS_KISS KissFFTKokkos *kissfftKK; #endif diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index f9946391063..439a914e3ae 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -77,8 +77,8 @@ kiss_fft_scalar re; kiss_fft_scalar im; } FFT_KOKKOS_DATA; - #ifndef FFT_KOKKOS_KISSFFT - #define FFT_KOKKOS_KISSFFT + #ifndef FFT_KOKKOS_KISS + #define FFT_KOKKOS_KISS #endif #endif @@ -159,7 +159,7 @@ typedef struct FFTArrayTypes FFT_DAT; typedef struct FFTArrayTypes FFT_HAT; -#if defined(FFT_KOKKOS_KISSFFT) +#if defined(FFT_KOKKOS_KISS) #include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 6a2618642bb..73e2c1f06f5 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -108,7 +108,7 @@ PPPMKokkos::PPPMKokkos(LAMMPS *lmp) : PPPM(lmp) remap = nullptr; #if defined (LMP_KOKKOS_GPU) - #if defined(FFT_KOKKOS_KISSFFT) + #if defined(FFT_KOKKOS_KISS) if (comm->me == 0) error->warning(FLERR,"Using default KISS FFT with Kokkos GPU backends may give suboptimal performance"); #endif diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index b1a4cffa002..a5ed057d97a 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -62,7 +62,7 @@ #ifdef LMP_KOKKOS // with KOKKOS in CUDA or HIP mode we can only have -// CUFFT/HIPFFT or KISSFFT, thus undefine all other +// CUFFT/HIPFFT or KISS, thus undefine all other // FFTs here #ifdef KOKKOS_ENABLE_CUDA @@ -75,8 +75,8 @@ # if defined(FFT_KOKKOS_MKL) # undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISSFFT) -# define FFT_KOKKOS_KISSFFT +# if !defined(FFT_KOKKOS_CUFFT) && !defined(FFT_KOKKOS_KISS) +# define FFT_KOKKOS_KISS # endif #elif defined(KOKKOS_ENABLE_HIP) # if defined(FFT_KOKKOS_FFTW) @@ -88,8 +88,8 @@ # if defined(FFT_KOKKOS_MKL) # undef FFT_KOKKOS_MKL # endif -# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISSFFT) -# define FFT_KOKKOS_KISSFFT +# if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISS) +# define FFT_KOKKOS_KISS # endif #else # if defined(FFT_KOKKOS_CUFFT)