Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DFT] Introduce the cuFFT backend for the DFT interface. #284

Merged
merged 28 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
004b2d4
[DFT] Rearrange DFT compute tests so unimplemented always skips (#311)
FMarno May 9, 2023
6b76512
Initial cuFFT integration
FMarno Feb 6, 2023
1f1bf61
throw from host task directly
FMarno Feb 13, 2023
ca49074
remove detail namespace where possible
FMarno Feb 13, 2023
12630a0
format
FMarno Feb 22, 2023
719b0e1
update after rebase
FMarno Feb 22, 2023
1c53bfd
style change
FMarno Feb 22, 2023
12d2e9f
Implemented all cufft execution functions
FMarno Feb 22, 2023
4860fdd
Increase the relative error margin so cufft backend passes tests
FMarno Mar 1, 2023
121f554
Fix swapped input and output strides
FMarno Mar 2, 2023
46b51b3
fix compile-time tests for cufft
FMarno Mar 10, 2023
2110c2e
fix macro typo
FMarno Mar 10, 2023
3128522
fix non cuda build and increase test accuracy error margin
FMarno Mar 13, 2023
a9d8154
update README
FMarno Mar 14, 2023
745f332
format with clang-format-10
FMarno Mar 27, 2023
0d006c4
enable recommit in cuda backend
FMarno Mar 30, 2023
14dda42
change cuda context after call to cufftDestroy
FMarno Apr 5, 2023
00ff378
update dft example cmake
FMarno Apr 5, 2023
e70a4a6
update example readme
FMarno Apr 5, 2023
8df0fee
typo in ENABLE_CUFFT_BACKEND description
FMarno Apr 5, 2023
4f63869
Update help text for the various backends
FMarno Apr 19, 2023
87cdf0d
use the correct copyright headers
FMarno Apr 19, 2023
5e13967
Fix cmake comment
FMarno Apr 19, 2023
d4465d8
fix binary name in example
FMarno Apr 19, 2023
bd503eb
Add an exception for when the user tries to scale with cufft
FMarno Apr 19, 2023
0d7fdd5
fix warnings
FMarno Apr 20, 2023
4b1d0b1
removed forward_scale in runtime example for cufft
FMarno Apr 20, 2023
82639ca
avoid creating plans with invalid strides
FMarno Apr 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,29 @@ endif()
option(BUILD_SHARED_LIBS "Build dynamic libraries" ON)

## Backends
option(ENABLE_MKLCPU_BACKEND "" ON)
option(ENABLE_MKLGPU_BACKEND "" ON)
option(ENABLE_MKLCPU_BACKEND "Enable the Intel oneMKL CPU backend for supported interfaces" ON)
option(ENABLE_MKLGPU_BACKEND "Enable the Intel oneMKL GPU backend for supported interfaces" ON)
if(ENABLE_MKLCPU_BACKEND)
option(ENABLE_MKLCPU_THREAD_TBB "" ON)
option(ENABLE_MKLCPU_THREAD_TBB "Enable the use of Intel TBB with the oneMKL CPU backend" ON)
endif()
option(ENABLE_CUBLAS_BACKEND "" OFF)

option(ENABLE_CUSOLVER_BACKEND "" OFF)
# blas
option(ENABLE_CUBLAS_BACKEND "Enable the cuBLAS backend for the BLAS interface" OFF)
option(ENABLE_ROCBLAS_BACKEND "Enable the rocBLAS backend for the BLAS interface" OFF)
option(ENABLE_NETLIB_BACKEND "Enable the Netlib backend for the BLAS interface" OFF)

# rand
option(ENABLE_CURAND_BACKEND "Enable the cuRAND backend for the RNG interface" OFF)
option(ENABLE_ROCRAND_BACKEND "Enable the rocRAND backend for the RNG interface" OFF)

# lapack
option(ENABLE_CUSOLVER_BACKEND "Enable the cuSOLVER backend for the LAPACK interface" OFF)
option(ENABLE_ROCSOLVER_BACKEND "Enable the rocSOLVER backend for the LAPACK interface" OFF)

# dft
option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF)


option(ENABLE_ROCBLAS_BACKEND "" OFF)
option(ENABLE_CURAND_BACKEND "" OFF)
option(ENABLE_ROCRAND_BACKEND "" OFF)
option(ENABLE_ROCSOLVER_BACKEND "" OFF)
option(ENABLE_NETLIB_BACKEND "" OFF)
set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")

Expand Down Expand Up @@ -89,7 +98,8 @@ if(ENABLE_MKLCPU_BACKEND
list(APPEND DOMAINS_LIST "rng")
endif()
if(ENABLE_MKLGPU_BACKEND
OR ENABLE_MKLCPU_BACKEND)
OR ENABLE_MKLCPU_BACKEND
OR ENABLE_CUFFT_BACKEND)
list(APPEND DOMAINS_LIST "dft")
endif()

Expand All @@ -99,8 +109,8 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUFFT_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
if(UNIX)
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ oneMKL is part of [oneAPI](https://oneapi.io).
<td align="center"><a href="https://developer.nvidia.com/curand"> NVIDIA cuRAND</a> for NVIDIA GPU </td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<td align="center"><a href="https://developer.nvidia.com/cufft"> NVIDIA cuFFT</a> for NVIDIA GPU </td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<td align="center"><a href="https://ww.netlib.org"> NETLIB LAPACK</a> for x86 CPU </td>
<td align="center">x86 CPU</td>
Expand Down Expand Up @@ -235,6 +239,12 @@ Supported domains: BLAS, LAPACK, RNG, DFT
<td align="center">Dynamic, Static</td>
<td align="center">DPC++</td>
</tr>
<tr>
<td align="center">NVIDIA GPU</td>
<td align="center">NVIDIA cuFFT</td>
<td align="center">Dynamic, Static</td>
<td align="center">DPC++</td>
</tr>
</tbody>
</table>

Expand Down
49 changes: 37 additions & 12 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ Random number generator example with uniform distribution ran OK on MKLCPU and C

## dft

Compile-time dispatching example with mklgpu backend
Compile-time dispatching example with MKLGPU backend

```none
$ SYCL_DEVICE_FILTER=gpu ./bin/example_dft_complex_fwd_buffer_mklgpu
Expand All @@ -380,29 +380,54 @@ Running with single precision real data type on:
DFT Complex USM example ran OK on MKLGPU
```

Runtime dispatching example with both mklgpu backend
Runtime dispatching example with both MKLGPU and cuFFT backend

```none
SYCL_DEVICE_FILTER=gpu ./bin/example_dft_complex_fwd_buffer_mklgpu
SYCL_DEVICE_FILTER=gpu ./bin/example_dft_real_fwd_usm

########################################################################
# Complex out-of-place forward transform for Buffer API's example:
# DFTI complex in-place forward transform with USM API example:
lhuot marked this conversation as resolved.
Show resolved Hide resolved
#
# Using APIs:
# Compile-time dispatch API
# Buffer forward complex out-of-place
# USM forward complex in-place
# Run-time dispatch
#
# Using single precision (float) data type
#
# For Intel GPU with Intel MKLGPU backend.
# Device will be selected during runtime.
# The environment variable SYCL_DEVICE_FILTER can be used to specify
# SYCL device
#
########################################################################

Running DFT complex forward example on GPU device
Device name is: Intel(R) UHD Graphics 750 [0x4c8a]
Running with single precision real data type:
DFT example run_time dispatch
DFT example ran OK
```

```none
SYCL_DEVICE_FILTER=gpu ./bin/example_dft_real_fwd_usm

########################################################################
# DFTI complex in-place forward transform with USM API example:
#
# Using APIs:
# USM forward complex in-place
# Run-time dispatch
#
# Using single precision (float) data type
#
# Device will be selected during runtime.
# The environment variable SYCL_DEVICE_FILTER can be used to specify
# SYCL device
#
########################################################################

Running DFT Complex forward out-of-place buffer example
Using compile-time dispatch API with MKLGPU.
Running with single precision real data type on:
GPU device :Intel(R) UHD Graphics 750 [0x4c8a]
DFT Complex USM example ran OK on MKLGPU
Running DFT complex forward example on GPU device
Device name is: NVIDIA A100-PCIE-40GB
Running with single precision real data type:
DFT example run_time dispatch
DFT example ran OK
```
4 changes: 2 additions & 2 deletions examples/dft/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# Build object from all example sources
set(DFT_RT_SOURCES "")
if(ENABLE_MKLGPU_BACKEND)
if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND)
list(APPEND DFT_RT_SOURCES "real_fwd_usm")
endif()

Expand All @@ -31,7 +31,7 @@ include(WarningsUtils)
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
# overwrite SYCL_DEVICE_FILTER in their environment to run on the desired backend
set(DEVICE_FILTERS "")
if(ENABLE_MKLGPU_BACKEND)
if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND)
list(APPEND DEVICE_FILTERS "gpu")
endif()

Expand Down
1 change: 0 additions & 1 deletion examples/dft/run_time_dispatching/real_fwd_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ void run_example(const sycl::device& dev) {
desc(static_cast<std::int64_t>(N));

// 2. variadic set_value
desc.set_value(oneapi::mkl::dft::config_param::FORWARD_SCALE, 1.f / static_cast<float>(N));
desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
static_cast<std::int64_t>(1));
desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
Expand Down
12 changes: 7 additions & 5 deletions include/oneapi/mkl/detail/backends.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,19 @@ enum class backend {
netlib,
rocblas,
rocrand,
cufft,
unsupported
};

typedef std::map<backend, std::string> backendmap;

static backendmap backend_map = {
{ backend::mklcpu, "mklcpu" }, { backend::mklgpu, "mklgpu" },
{ backend::cublas, "cublas" }, { backend::cusolver, "cusolver" },
{ backend::curand, "curand" }, { backend::netlib, "netlib" },
{ backend::rocblas, "rocblas" }, { backend::rocrand, "rocrand" },
{ backend::rocsolver, "rocsolver" }, { backend::unsupported, "unsupported" }
{ backend::mklcpu, "mklcpu" }, { backend::mklgpu, "mklgpu" },
{ backend::cublas, "cublas" }, { backend::cusolver, "cusolver" },
{ backend::curand, "curand" }, { backend::netlib, "netlib" },
{ backend::rocblas, "rocblas" }, { backend::rocrand, "rocrand" },
{ backend::rocsolver, "rocsolver" }, { backend::cufft, "cufft" },
{ backend::unsupported, "unsupported" }
};

} //namespace mkl
Expand Down
6 changes: 6 additions & 0 deletions include/oneapi/mkl/detail/backends_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
{
#ifdef ENABLE_MKLGPU_BACKEND
LIB_NAME("dft_mklgpu")
#endif
} },
{ device::nvidiagpu,
{
#ifdef ENABLE_CUFFT_BACKEND
LIB_NAME("dft_cufft")
#endif
} } } },

Expand Down
4 changes: 2 additions & 2 deletions include/oneapi/mkl/dft/detail/commit_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class dft_values;
template <precision prec, domain dom>
class commit_impl {
public:
commit_impl(sycl::queue queue, mkl::backend backend) : backend_(backend), queue_(queue) {}
commit_impl(sycl::queue queue, mkl::backend backend) : queue_(queue), backend_(backend) {}

// rule of three
commit_impl(const commit_impl& other) = delete;
Expand All @@ -60,8 +60,8 @@ class commit_impl {
virtual void commit(const dft_values<prec, dom>&) = 0;

private:
mkl::backend backend_;
sycl::queue queue_;
mkl::backend backend_;
};

} // namespace oneapi::mkl::dft::detail
Expand Down
49 changes: 49 additions & 0 deletions include/oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*******************************************************************************
* Copyright Codeplay Software Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
*
* SPDX-License-Identifier: Apache-2.0
*******************************************************************************/

#ifndef _ONEMKL_DFT_CUFFT_HPP_
#define _ONEMKL_DFT_CUFFT_HPP_

#if __has_include(<sycl/sycl.hpp>)
#include <sycl/sycl.hpp>
#else
#include <CL/sycl.hpp>
#endif

#include "oneapi/mkl/detail/export.hpp"
#include "oneapi/mkl/dft/detail/types_impl.hpp"

namespace oneapi::mkl::dft {

namespace detail {
// Forward declarations
template <precision prec, domain dom>
class commit_impl;

template <precision prec, domain dom>
class descriptor;
} // namespace detail

namespace cufft {
#include "oneapi/mkl/dft/detail/dft_ct.hxx"
} // namespace cufft

} // namespace oneapi::mkl::dft

#endif // _ONEMKL_DFT_CUFFT_HPP_
4 changes: 4 additions & 0 deletions include/oneapi/mkl/dft/detail/descriptor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class descriptor {
void commit(backend_selector<backend::mklgpu> selector);
#endif

#ifdef ENABLE_CUFFT_BACKEND
void commit(backend_selector<backend::cufft> selector);
#endif

const dft_values<prec, dom>& get_values() const noexcept {
return values_;
};
Expand Down
1 change: 1 addition & 0 deletions src/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#cmakedefine ENABLE_CUBLAS_BACKEND
#cmakedefine ENABLE_CUSOLVER_BACKEND
#cmakedefine ENABLE_CUFFT_BACKEND
#cmakedefine ENABLE_ROCBLAS_BACKEND
#cmakedefine ENABLE_ROCRAND_BACKEND
#cmakedefine ENABLE_ROCSOLVER_BACKEND
Expand Down
4 changes: 4 additions & 0 deletions src/dft/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ endif()
if(ENABLE_MKLCPU_BACKEND)
add_subdirectory(mklcpu)
endif()

if(ENABLE_CUFFT_BACKEND)
add_subdirectory(cufft)
endif()
74 changes: 74 additions & 0 deletions src/dft/backends/cufft/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#===============================================================================
# Copyright Codeplay Software Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

set(LIB_NAME onemkl_dft_cufft)
set(LIB_OBJ ${LIB_NAME}_obj)

find_package(CUDAToolkit REQUIRED)

add_library(${LIB_NAME})
add_library(${LIB_OBJ} OBJECT
descriptor.cpp
commit.cpp
forward.cpp
backward.cpp
compute_signature.cpp
$<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_cufft_wrappers.cpp>
)

target_include_directories(${LIB_OBJ}
PRIVATE ${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/src
${CMAKE_BINARY_DIR}/bin
${MKL_INCLUDE}
)

target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT} ${MKL_COPT})

target_link_libraries(${LIB_OBJ} PRIVATE CUDA::cufft CUDA::cuda_driver)

target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL ${MKL_LINK_SYCL})

set_target_properties(${LIB_OBJ} PROPERTIES
POSITION_INDEPENDENT_CODE ON
)
target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})

#Set oneMKL libraries as not transitive for dynamic
if(BUILD_SHARED_LIBS)
set_target_properties(${LIB_NAME} PROPERTIES
INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
)
endif()

# Add major version to the library
set_target_properties(${LIB_NAME} PROPERTIES
SOVERSION ${PROJECT_VERSION_MAJOR}
)

# Add dependencies rpath to the library
list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)

# Add the library to install package
install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
)
Loading