Skip to content

Commit

Permalink
[DFT] Introduce the cuFFT backend for the DFT interface. (#284)
Browse files Browse the repository at this point in the history
* [DFT] Rearrange DFT compute tests so unimplemented always skips (#311)

* rearrange tests so unimplemented always skips

* wait to wait_and_throw, detect skipped tests

* Initial cuFFT integration

Currently only has support for inplace complex-to-complex single precision transforms

* throw from host task directly

* remove detail namespace where possible

* format

* update after rebase

* style change

* Implemented all cufft execution functions

* Increase the relative error margin so cufft backend passes tests

* Fix swapped input and output strides

* fix compile-time tests for cufft

* fix macro typo

* fix non cuda build and increase test accuracy error margin

* update README

* format with clang-format-10

* enable recommit in cuda backend

* change cuda context after call to cufftDestroy

* update dft example cmake

* update example readme

* typo in ENABLE_CUFFT_BACKEND description

* Update help text for the various backends

* use the correct copyright headers

* Fix cmake comment

* fix binary name in example

* Add an exception for when the user tries to scale with cufft

* fix warnings

* removed forward_scale in runtime example for cufft

* avoid creating plans with invalid strides
  • Loading branch information
FMarno authored May 9, 2023
1 parent 52a4ccd commit 8155847
Show file tree
Hide file tree
Showing 28 changed files with 1,137 additions and 51 deletions.
36 changes: 23 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,29 @@ endif()
option(BUILD_SHARED_LIBS "Build dynamic libraries" ON)

## Backends
option(ENABLE_MKLCPU_BACKEND "" ON)
option(ENABLE_MKLGPU_BACKEND "" ON)
option(ENABLE_MKLCPU_BACKEND "Enable the Intel oneMKL CPU backend for supported interfaces" ON)
option(ENABLE_MKLGPU_BACKEND "Enable the Intel oneMKL GPU backend for supported interfaces" ON)
if(ENABLE_MKLCPU_BACKEND)
option(ENABLE_MKLCPU_THREAD_TBB "" ON)
option(ENABLE_MKLCPU_THREAD_TBB "Enable the use of Intel TBB with the oneMKL CPU backend" ON)
endif()
option(ENABLE_CUBLAS_BACKEND "" OFF)

option(ENABLE_CUSOLVER_BACKEND "" OFF)
# blas
option(ENABLE_CUBLAS_BACKEND "Enable the cuBLAS backend for the BLAS interface" OFF)
option(ENABLE_ROCBLAS_BACKEND "Enable the rocBLAS backend for the BLAS interface" OFF)
option(ENABLE_NETLIB_BACKEND "Enable the Netlib backend for the BLAS interface" OFF)

# rand
option(ENABLE_CURAND_BACKEND "Enable the cuRAND backend for the RNG interface" OFF)
option(ENABLE_ROCRAND_BACKEND "Enable the rocRAND backend for the RNG interface" OFF)

# lapack
option(ENABLE_CUSOLVER_BACKEND "Enable the cuSOLVER backend for the LAPACK interface" OFF)
option(ENABLE_ROCSOLVER_BACKEND "Enable the rocSOLVER backend for the LAPACK interface" OFF)

# dft
option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF)


option(ENABLE_ROCBLAS_BACKEND "" OFF)
option(ENABLE_CURAND_BACKEND "" OFF)
option(ENABLE_ROCRAND_BACKEND "" OFF)
option(ENABLE_ROCSOLVER_BACKEND "" OFF)
option(ENABLE_NETLIB_BACKEND "" OFF)
set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")

Expand Down Expand Up @@ -89,7 +98,8 @@ if(ENABLE_MKLCPU_BACKEND
list(APPEND DOMAINS_LIST "rng")
endif()
if(ENABLE_MKLGPU_BACKEND
OR ENABLE_MKLCPU_BACKEND)
OR ENABLE_MKLCPU_BACKEND
OR ENABLE_CUFFT_BACKEND)
list(APPEND DOMAINS_LIST "dft")
endif()

Expand All @@ -99,8 +109,8 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUFFT_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
if(UNIX)
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ oneMKL is part of [oneAPI](https://oneapi.io).
<td align="center"><a href="https://developer.nvidia.com/curand"> NVIDIA cuRAND</a> for NVIDIA GPU </td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<td align="center"><a href="https://developer.nvidia.com/cufft"> NVIDIA cuFFT</a> for NVIDIA GPU </td>
<td align="center">NVIDIA GPU</td>
</tr>
<tr>
<td align="center"><a href="https://ww.netlib.org"> NETLIB LAPACK</a> for x86 CPU </td>
<td align="center">x86 CPU</td>
Expand Down Expand Up @@ -235,6 +239,12 @@ Supported domains: BLAS, LAPACK, RNG, DFT
<td align="center">Dynamic, Static</td>
<td align="center">DPC++</td>
</tr>
<tr>
<td align="center">NVIDIA GPU</td>
<td align="center">NVIDIA cuFFT</td>
<td align="center">Dynamic, Static</td>
<td align="center">DPC++</td>
</tr>
</tbody>
</table>

Expand Down
49 changes: 37 additions & 12 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ Random number generator example with uniform distribution ran OK on MKLCPU and C

## dft

Compile-time dispatching example with mklgpu backend
Compile-time dispatching example with MKLGPU backend

```none
$ SYCL_DEVICE_FILTER=gpu ./bin/example_dft_complex_fwd_buffer_mklgpu
Expand All @@ -380,29 +380,54 @@ Running with single precision real data type on:
DFT Complex USM example ran OK on MKLGPU
```

Runtime dispatching example with both mklgpu backend
Runtime dispatching example with both MKLGPU and cuFFT backend

```none
SYCL_DEVICE_FILTER=gpu ./bin/example_dft_complex_fwd_buffer_mklgpu
SYCL_DEVICE_FILTER=gpu ./bin/example_dft_real_fwd_usm
########################################################################
# Complex out-of-place forward transform for Buffer API's example:
# DFTI complex in-place forward transform with USM API example:
#
# Using APIs:
# Compile-time dispatch API
# Buffer forward complex out-of-place
# USM forward complex in-place
# Run-time dispatch
#
# Using single precision (float) data type
#
# For Intel GPU with Intel MKLGPU backend.
# Device will be selected during runtime.
# The environment variable SYCL_DEVICE_FILTER can be used to specify
# SYCL device
#
########################################################################
Running DFT complex forward example on GPU device
Device name is: Intel(R) UHD Graphics 750 [0x4c8a]
Running with single precision real data type:
DFT example run_time dispatch
DFT example ran OK
```

```none
SYCL_DEVICE_FILTER=gpu ./bin/example_dft_real_fwd_usm
########################################################################
# DFTI complex in-place forward transform with USM API example:
#
# Using APIs:
# USM forward complex in-place
# Run-time dispatch
#
# Using single precision (float) data type
#
# Device will be selected during runtime.
# The environment variable SYCL_DEVICE_FILTER can be used to specify
# SYCL device
#
########################################################################
Running DFT Complex forward out-of-place buffer example
Using compile-time dispatch API with MKLGPU.
Running with single precision real data type on:
GPU device :Intel(R) UHD Graphics 750 [0x4c8a]
DFT Complex USM example ran OK on MKLGPU
Running DFT complex forward example on GPU device
Device name is: NVIDIA A100-PCIE-40GB
Running with single precision real data type:
DFT example run_time dispatch
DFT example ran OK
```
4 changes: 2 additions & 2 deletions examples/dft/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# Build object from all example sources
set(DFT_RT_SOURCES "")
if(ENABLE_MKLGPU_BACKEND)
if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND)
list(APPEND DFT_RT_SOURCES "real_fwd_usm")
endif()

Expand All @@ -31,7 +31,7 @@ include(WarningsUtils)
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
# overwrite SYCL_DEVICE_FILTER in their environment to run on the desired backend
set(DEVICE_FILTERS "")
if(ENABLE_MKLGPU_BACKEND)
if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND)
list(APPEND DEVICE_FILTERS "gpu")
endif()

Expand Down
1 change: 0 additions & 1 deletion examples/dft/run_time_dispatching/real_fwd_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ void run_example(const sycl::device& dev) {
desc(static_cast<std::int64_t>(N));

// 2. variadic set_value
desc.set_value(oneapi::mkl::dft::config_param::FORWARD_SCALE, 1.f / static_cast<float>(N));
desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
static_cast<std::int64_t>(1));
desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT,
Expand Down
12 changes: 7 additions & 5 deletions include/oneapi/mkl/detail/backends.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,19 @@ enum class backend {
netlib,
rocblas,
rocrand,
cufft,
unsupported
};

typedef std::map<backend, std::string> backendmap;

static backendmap backend_map = {
{ backend::mklcpu, "mklcpu" }, { backend::mklgpu, "mklgpu" },
{ backend::cublas, "cublas" }, { backend::cusolver, "cusolver" },
{ backend::curand, "curand" }, { backend::netlib, "netlib" },
{ backend::rocblas, "rocblas" }, { backend::rocrand, "rocrand" },
{ backend::rocsolver, "rocsolver" }, { backend::unsupported, "unsupported" }
{ backend::mklcpu, "mklcpu" }, { backend::mklgpu, "mklgpu" },
{ backend::cublas, "cublas" }, { backend::cusolver, "cusolver" },
{ backend::curand, "curand" }, { backend::netlib, "netlib" },
{ backend::rocblas, "rocblas" }, { backend::rocrand, "rocrand" },
{ backend::rocsolver, "rocsolver" }, { backend::cufft, "cufft" },
{ backend::unsupported, "unsupported" }
};

} //namespace mkl
Expand Down
6 changes: 6 additions & 0 deletions include/oneapi/mkl/detail/backends_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
{
#ifdef ENABLE_MKLGPU_BACKEND
LIB_NAME("dft_mklgpu")
#endif
} },
{ device::nvidiagpu,
{
#ifdef ENABLE_CUFFT_BACKEND
LIB_NAME("dft_cufft")
#endif
} } } },

Expand Down
4 changes: 2 additions & 2 deletions include/oneapi/mkl/dft/detail/commit_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class dft_values;
template <precision prec, domain dom>
class commit_impl {
public:
commit_impl(sycl::queue queue, mkl::backend backend) : backend_(backend), queue_(queue) {}
commit_impl(sycl::queue queue, mkl::backend backend) : queue_(queue), backend_(backend) {}

// rule of three
commit_impl(const commit_impl& other) = delete;
Expand All @@ -60,8 +60,8 @@ class commit_impl {
virtual void commit(const dft_values<prec, dom>&) = 0;

private:
mkl::backend backend_;
sycl::queue queue_;
mkl::backend backend_;
};

} // namespace oneapi::mkl::dft::detail
Expand Down
49 changes: 49 additions & 0 deletions include/oneapi/mkl/dft/detail/cufft/onemkl_dft_cufft.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*******************************************************************************
* Copyright Codeplay Software Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
*
* SPDX-License-Identifier: Apache-2.0
*******************************************************************************/

#ifndef _ONEMKL_DFT_CUFFT_HPP_
#define _ONEMKL_DFT_CUFFT_HPP_

#if __has_include(<sycl/sycl.hpp>)
#include <sycl/sycl.hpp>
#else
#include <CL/sycl.hpp>
#endif

#include "oneapi/mkl/detail/export.hpp"
#include "oneapi/mkl/dft/detail/types_impl.hpp"

namespace oneapi::mkl::dft {

namespace detail {
// Forward declarations
template <precision prec, domain dom>
class commit_impl;

template <precision prec, domain dom>
class descriptor;
} // namespace detail

namespace cufft {
#include "oneapi/mkl/dft/detail/dft_ct.hxx"
} // namespace cufft

} // namespace oneapi::mkl::dft

#endif // _ONEMKL_DFT_CUFFT_HPP_
4 changes: 4 additions & 0 deletions include/oneapi/mkl/dft/detail/descriptor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class descriptor {
void commit(backend_selector<backend::mklgpu> selector);
#endif

#ifdef ENABLE_CUFFT_BACKEND
void commit(backend_selector<backend::cufft> selector);
#endif

const dft_values<prec, dom>& get_values() const noexcept {
return values_;
};
Expand Down
1 change: 1 addition & 0 deletions src/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#cmakedefine ENABLE_CUBLAS_BACKEND
#cmakedefine ENABLE_CUSOLVER_BACKEND
#cmakedefine ENABLE_CUFFT_BACKEND
#cmakedefine ENABLE_ROCBLAS_BACKEND
#cmakedefine ENABLE_ROCRAND_BACKEND
#cmakedefine ENABLE_ROCSOLVER_BACKEND
Expand Down
4 changes: 4 additions & 0 deletions src/dft/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ endif()
if(ENABLE_MKLCPU_BACKEND)
add_subdirectory(mklcpu)
endif()

if(ENABLE_CUFFT_BACKEND)
add_subdirectory(cufft)
endif()
74 changes: 74 additions & 0 deletions src/dft/backends/cufft/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#===============================================================================
# Copyright Codeplay Software Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

set(LIB_NAME onemkl_dft_cufft)
set(LIB_OBJ ${LIB_NAME}_obj)

find_package(CUDAToolkit REQUIRED)

add_library(${LIB_NAME})
add_library(${LIB_OBJ} OBJECT
descriptor.cpp
commit.cpp
forward.cpp
backward.cpp
compute_signature.cpp
$<$<BOOL:${BUILD_SHARED_LIBS}>: mkl_dft_cufft_wrappers.cpp>
)

target_include_directories(${LIB_OBJ}
PRIVATE ${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/src
${CMAKE_BINARY_DIR}/bin
${MKL_INCLUDE}
)

target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT} ${MKL_COPT})

target_link_libraries(${LIB_OBJ} PRIVATE CUDA::cufft CUDA::cuda_driver)

target_link_libraries(${LIB_OBJ} PUBLIC ONEMKL::SYCL::SYCL ${MKL_LINK_SYCL})

set_target_properties(${LIB_OBJ} PROPERTIES
POSITION_INDEPENDENT_CODE ON
)
target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ})

#Set oneMKL libraries as not transitive for dynamic
if(BUILD_SHARED_LIBS)
set_target_properties(${LIB_NAME} PROPERTIES
INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL
)
endif()

# Add major version to the library
set_target_properties(${LIB_NAME} PROPERTIES
SOVERSION ${PROJECT_VERSION_MAJOR}
)

# Add dependencies rpath to the library
list(APPEND CMAKE_BUILD_RPATH $<TARGET_FILE_DIR:${LIB_NAME}>)

# Add the library to install package
install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets)
install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
)
Loading

0 comments on commit 8155847

Please sign in to comment.