ROCm · dgaliffiAMD · Sep 17, 2024 · Aug 14, 2024 · Aug 15, 2024 · Aug 15, 2024
@@ -39,7 +39,7 @@ variables:
   HIP_FLAGS: "-Wno-unused-command-line-argument -Wall -Wextra -Werror"
   # Keep in sync with ROCM_VERSION in Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
   # and Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile
-  DOCKER_ROCM_VERSION: 6.1.0
+  DOCKER_ROCM_VERSION: 6.2.0
   DOCKER_HIP_LIBRARIES_ROCM_TAG: rocm-ubuntu-${DOCKER_ROCM_VERSION}
   DOCKER_HIP_LIBRARIES_CUDA_TAG: cuda-ubuntu-${DOCKER_ROCM_VERSION}
   DOCKER_HIP_LIBRARIES_ROCM: $DOCKER_TAG_PREFIX:$DOCKER_HIP_LIBRARIES_ROCM_TAG

@@ -1,6 +1,6 @@
 // MIT License
 //
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal

@@ -2,11 +2,11 @@
 # Above is required for substitutions in environment variables
 
 # CUDA based docker image
-FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
+FROM nvidia/cuda:12.6.0-devel-ubuntu22.04
 
 # The ROCm versions that this image is based of.
 # Always write this down as major.minor.patch
-ENV ROCM_VERSION=6.1.0
+ENV ROCM_VERSION=6.2.0
 ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
 
 # Base packages that are required for the installation
@@ -53,10 +53,14 @@ RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \
 ENV HIP_COMPILER=nvcc HIP_PLATFORM=nvidia HIP_RUNTIME=cuda
 
 # Install rocRAND
+# We need to apply this patch to make it work on Nvidia for ROCm 6.2: https://github.com/ROCm/rocRAND/commit/7ec5fda5243e599d83af841b5c38198a2f7f05fa
 RUN wget https://github.com/ROCm/rocRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O rocrand.tar.gz \
     && mkdir rocrand \
     && tar -xf ./rocrand.tar.gz --strip-components 1 -C rocrand \
     && rm ./rocrand.tar.gz \
+    && wget https://github.com/ROCm/rocRAND/commit/7ec5fda5243e599d83af841b5c38198a2f7f05fa.patch -O rocrand.patch \
+    && patch -p1 -d rocrand < ./rocrand.patch \
+    && rm rocrand.patch \
     && cmake -S ./rocrand -B ./rocrand/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
         -D BUILD_HIPRAND=OFF \
@@ -89,26 +93,22 @@ RUN wget https://github.com/ROCm/hipBLAS/archive/refs/tags/rocm-${ROCM_VERSION}.
     && rm -rf ./hipblas
 
 # Install hipSOLVER
-# hipSOLVER cmake for rocm-6.1.0 is broken added CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ as fix
 RUN wget https://github.com/ROCm/hipSOLVER/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hipsolver.tar.gz \
     && mkdir hipsolver \
     && tar -xf ./hipsolver.tar.gz --strip-components 1 -C hipsolver \
     && rm ./hipsolver.tar.gz \
-    && CXXFLAGS=-D__HIP_PLATFORM_NVIDIA__ cmake -S ./hipsolver -B ./hipsolver/build \
+    && cmake -S ./hipsolver -B ./hipsolver/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \
         -D USE_CUDA=ON \
     && cmake --build ./hipsolver/build --target install \
     && rm -rf ./hipsolver
 
 # Install hipRAND
-# Manually replace usage of __HIP_PLATFORM_NVCC__ with __HIP_PLATFORM_NVIDIA__. See
-# https://github.com/ROCm/hipRAND/commit/4925f0da96fad5b9f532ddc79f1f52fc279d329f
 RUN wget https://github.com/ROCm/hipRAND/archive/refs/tags/rocm-${ROCM_VERSION}.tar.gz -O hiprand.tar.gz \
     && mkdir hiprand \
     && tar -xf ./hiprand.tar.gz --strip-components 1 -C hiprand \
     && rm ./hiprand.tar.gz \
-    && sed -i s/__HIP_PLATFORM_NVCC__/__HIP_PLATFORM_NVIDIA__/ ./hiprand/library/include/hiprand/hiprand.h \
     && cmake -S ./hiprand -B ./hiprand/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/lib/cmake/hip \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \

@@ -6,7 +6,7 @@ FROM ubuntu:22.04
 
 # The ROCm versions that this image is based of.
 # Always write this down as major.minor.patch
-ENV ROCM_VERSION=6.1.0
+ENV ROCM_VERSION=6.2.0
 ENV ROCM_VERSION_APT=${ROCM_VERSION%.0}
 
 # Base packages that are required for the installation

@@ -56,9 +56,7 @@ void fft_example(const int dimension, const int size = 4, const int direction =
     std::uniform_real_distribution<double> distribution{};
     std::generate(input.begin(),
                   input.end(),
-                  [&]() {
-                      return input_t{distribution(generator), distribution(generator)};
-                  });
+                  [&]() { return input_t{distribution(generator), distribution(generator)}; });
 
     std::cout << "Input:\n" << std::setprecision(3);
     print_nd_data(input, n, 16);

@@ -91,13 +91,16 @@ int main(int argc, char* argv[])
 
     // Define infield geometry
     // First entry of upper dimension is the batch size
+    const size_t              batch_size     = 1;
     const std::vector<size_t> inbrick0_lower = {0, 0, 0, 0};
-    const std::vector<size_t> inbrick0_upper = {1, length[0] / deviceCount, length[1], length[2]};
-    const std::vector<size_t> inbrick1_lower = {0, length[0] / deviceCount, 0, 0};
-    const std::vector<size_t> inbrick1_upper = {1, length[0], length[1], length[2]};
+    const std::vector<size_t> inbrick0_upper
+        = {length[0] / deviceCount, length[1], length[2], batch_size};
+    const std::vector<size_t> inbrick1_lower = {length[0] / deviceCount, 0, 0, 0};
+    const std::vector<size_t> inbrick1_upper = {length[0], length[1], length[2], batch_size};
 
     // Row-major stride for brick data layout in memory
-    std::vector<size_t> brick_stride = {fftSize, length[0] * length[1], length[0], 1};
+    const size_t        idist        = fftSize; // distance between batches
+    std::vector<size_t> brick_stride = {1, length[0] * length[1], length[0], idist};
 
     rocfft_field infield = nullptr;
     ROCFFT_CHECK(rocfft_field_create(&infield));
@@ -145,9 +148,9 @@ int main(int argc, char* argv[])
 
     std::vector<void*>        gpu_out(2);
     const std::vector<size_t> outbrick0_lower = {0, 0, 0, 0};
-    const std::vector<size_t> outbrick0_upper = {1, length[0] / deviceCount, length[1], length[2]};
-    const std::vector<size_t> outbrick1_lower = {0, length[0] / deviceCount, 0, 0};
-    const std::vector<size_t> outbrick1_upper = {1, length[0], length[1], length[2]};
+    const std::vector<size_t> outbrick0_upper = {length[0] / deviceCount, length[1], length[2], 1};
+    const std::vector<size_t> outbrick1_lower = {length[0] / deviceCount, 0, 0, 0};
+    const std::vector<size_t> outbrick1_upper = {length[0], length[1], length[2], 1};
 
     rocfft_brick outbrick0 = nullptr;
     ROCFFT_CHECK(rocfft_brick_create(&outbrick0,

@@ -1,6 +1,6 @@
 // MIT License
 //
-// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
@@ -253,8 +253,7 @@ int main(const int argc, const char** argv)
 
     if(number_of_devies <= 0)
     {
-        std::cerr << "HIP supported devices not found!"
-                  << "\n";
+        std::cerr << "HIP supported devices not found!\n";
         exit(error_exit_code);
     }