diff --git a/include/kernels/field_strength_tensor.cuh b/include/kernels/field_strength_tensor.cuh index 36bec430ba..b472529a58 100644 --- a/include/kernels/field_strength_tensor.cuh +++ b/include/kernels/field_strength_tensor.cuh @@ -169,7 +169,7 @@ namespace quda // 3*18 + 12*198 = 54 + 2376 = 2430 { F -= conj(F); // 18 real subtractions + one matrix conjugation - F *= static_cast(0.125); // 18 real multiplications + F *= static_cast(0.125); // 18 real multiplications // 36 floating point operations here } diff --git a/include/targets/cuda/shared_memory_helper.h b/include/targets/cuda/shared_memory_helper.h index 25ae718513..af276d635f 100644 --- a/include/targets/cuda/shared_memory_helper.h +++ b/include/targets/cuda/shared_memory_helper.h @@ -72,7 +72,7 @@ namespace quda /** @brief Constructor for SharedMemory object. */ - HostDevice constexpr SharedMemory() : data(cache(get_offset(target::block_dim()))) { } + constexpr SharedMemory() : data(cache(get_offset(target::block_dim()))) { } /** @brief Constructor for SharedMemory object. @@ -85,7 +85,7 @@ namespace quda /** @brief Return this SharedMemory object. */ - HostDevice constexpr auto sharedMem() const { return *this; } + constexpr auto sharedMem() const { return *this; } /** @brief Subscripting operator returning a reference to element. diff --git a/include/targets/hip/shared_memory_helper.h b/include/targets/hip/shared_memory_helper.h index 032ddc007a..5f74a595cd 100644 --- a/include/targets/hip/shared_memory_helper.h +++ b/include/targets/hip/shared_memory_helper.h @@ -57,11 +57,10 @@ namespace quda /** @brief Byte offset for this shared memory object. */ - template - static constexpr unsigned int get_offset(dim3 block, Arg &...arg) + static constexpr unsigned int get_offset(dim3 block) { unsigned int o = 0; - if constexpr (!std::is_same_v) { o = O::shared_mem_size(block, arg...); } + if constexpr (!std::is_same_v) { o = O::shared_mem_size(block); } return o; } @@ -73,7 +72,7 @@ namespace quda /** @brief Constructor for SharedMemory object. */ - HostDevice constexpr SharedMemory() : data(cache(get_offset(target::block_dim()))) { } + __device__ __host__ constexpr SharedMemory() : data(cache(get_offset(target::block_dim()))) { } /** @brief Constructor for SharedMemory object. @@ -86,7 +85,7 @@ namespace quda /** @brief Return this SharedMemory object. */ - constexpr auto sharedMem() const { return *this; } + __device__ __host__ constexpr auto sharedMem() const { return *this; } /** @brief Subscripting operator returning a reference to element. diff --git a/include/targets/hip/tunable_kernel.h b/include/targets/hip/tunable_kernel.h index 028ebdca29..6154060bdb 100644 --- a/include/targets/hip/tunable_kernel.h +++ b/include/targets/hip/tunable_kernel.h @@ -7,7 +7,6 @@ #include #include #include -#include namespace quda {