Skip to content

Commit

Permalink
OpenACC + Cray CCE + AMD MI200+ (#368)
Browse files Browse the repository at this point in the history
Co-authored-by: Henry Le Berre <[email protected]>
Co-authored-by: Steve Abbott <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: wilfonba <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Steve Abbott <[email protected]>
Co-authored-by: Abbott, Stephen R <[email protected]>
Co-authored-by: Spencer Bryngelson <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Ben Wilfong <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand <[email protected]>
Co-authored-by: Anand Radhakrishnan <[email protected]>
Co-authored-by: Anand <[email protected]>
Co-authored-by: Spencer Bryngelson <[email protected]>
  • Loading branch information
32 people authored Apr 6, 2024
1 parent ddb3edd commit 2b3d35d
Show file tree
Hide file tree
Showing 53 changed files with 2,064 additions and 1,087 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/frontier/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

. ./mfc.sh load -c f -m g
./mfc.sh build -j 8 --gpu
43 changes: 43 additions & 0 deletions .github/workflows/frontier/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

set -e

usage() {
echo "Usage: $0 [script.sh] [cpu|gpu]"
}

if [ ! -z "$1" ]; then
sbatch_script_contents=`cat $1`
else
usage
exit 1
fi

job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"

sbatch <<EOT
#!/bin/bash
#SBATCH -JMFC-$job_slug # Job name
#SBATCH -A CFD154 # charge account
#SBATCH -N 1 # Number of nodes required
#SBATCH -n 8 # Number of cores required
#SBATCH -t 02:00:00 # Duration of the job (Ex: 15 mins)
#SBATCH -q debug # QOS Name
#SBATCH -o$job_slug.out # Combined output and error messages file
#SBATCH -W # Do not exit until the submitted job terminates.
set -e
set -x
cd "\$SLURM_SUBMIT_DIR"
echo "Running in $(pwd):"
job_slug="$job_slug"
job_device="$2"
. ./mfc.sh load -c f -m g
$sbatch_script_contents
EOT

3 changes: 3 additions & 0 deletions .github/workflows/frontier/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

./mfc.sh test -j 4 -a -- -c frontier
17 changes: 15 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,22 +99,35 @@ jobs:
run: sudo ./mfc.sh docker ./mfc.sh test -j $(nproc) -a

self:
name: Georgia Tech | Phoenix (NVHPC)
name: Self Hosted
if: github.repository == 'MFlowCode/MFC'
continue-on-error: true
strategy:
matrix:
device: ['cpu', 'gpu']
lbl: ['gt', 'frontier']
exclude:
- device: cpu
lbl: frontier
runs-on:
group: phoenix
labels: gt
labels: ${{ matrix.lbl }}
steps:
- name: Clone
uses: actions/checkout@v3

- name: Build & Test
if: matrix.lbl == 'gt'
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}

- name: Build
if: matrix.lbl == 'frontier'
run: bash .github/workflows/frontier/build.sh

- name: Test
if: matrix.lbl == 'frontier'
run: bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}}

- name: Print Logs
if: always()
run: cat test-${{ matrix.device }}.out
Expand Down
4 changes: 3 additions & 1 deletion .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ AttributeIDSupressMenu = "AttributeIDSupressMenu"
INOUT = "INOUT"
WRONLY = "WRONLY"
nd = "nd"
iy = "iy"
HPE = "HPE"

[files]
extend-exclude = ["docs/documentation/references*", "tests/"]
extend-exclude = ["docs/documentation/references*", "tests/", "toolchain/cce_simulation_workgroup_256.sh"]
65 changes: 48 additions & 17 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ cmake_minimum_required(VERSION 3.20)
# We include C as a language because - for some reason -
# FIND_LIBRARY_USE_LIB64_PATHS is otherwise ignored.

project(MFC LANGUAGES C Fortran)

project(MFC LANGUAGES C CXX Fortran)

# Build options exposed to users and their default values.

Expand Down Expand Up @@ -109,7 +108,7 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(
-fcheck=all,no-array-temps
-fcheck=all,no-array-temps
-fbacktrace
-fimplicit-none
#-ffpe-trap=invalid,zero,denormal,overflow
Expand All @@ -125,11 +124,22 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
add_compile_options(
"SHELL:-h nomessage=296:878:1391:1069"
"SHELL:-h msgs" "SHELL:-h static" "SHELL:-h keepfiles"
"SHELL:-h static" "SHELL:-h keepfiles"
"SHELL:-h acc_model=auto_async_none"
"SHELL: -h acc_model=no_fast_addr"
"SHELL: -h list=adm" "-DCRAY_ACC_SIMPLIFY" "-DCRAY_ACC_WAR"
)

add_link_options("SHELL:-hkeepfiles")

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(-e D "SHELL:-h func_trace")
add_compile_options(
"SHELL:-h acc_model=auto_async_none"
"SHELL: -h acc_model=no_fast_addr"
"SHELL: -K trap=fp" "SHELL: -G2"

)
add_link_options("SHELL: -K trap=fp" "SHELL: -G2")
endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang")
add_compile_options(
Expand Down Expand Up @@ -197,13 +207,14 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
endif()



# HANDLE_SOURCES: Given a target (herein <target>):
#
# * Locate all source files for <target> of the type
#
# src/[<target>,common]/[.,include]/*.[f90,fpp].
#
# * For each .fpp file found with filepath <dirpath>/<filename>.fpp, using a
#
# * For each .fpp file found with filepath <dirpath>/<filename>.fpp, using a
# custom command, instruct CMake how to generate a file with path
#
# src/<target>/fypp/<filename>.f90
Expand All @@ -224,7 +235,7 @@ endif()
# file is modified, but also when any file with filepath of the form
#
# src/[<target>,common]/include/*.fpp
#
#
# is modified. This is a reasonable compromise as modifications to .fpp files
# in the include directories will be rare - by design. Other approaches would
# have required a more complex CMakeLists.txt file (perhaps parsing the .fpp
Expand Down Expand Up @@ -333,11 +344,11 @@ function(MFC_SETUP_TARGET)

set_target_properties(${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON)

target_include_directories(${ARGS_TARGET} PRIVATE
target_include_directories(${ARGS_TARGET} PRIVATE
"${CMAKE_SOURCE_DIR}/src/common"
"${CMAKE_SOURCE_DIR}/src/common/include"
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")

if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
target_include_directories(${ARGS_TARGET} PRIVATE
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
Expand Down Expand Up @@ -367,9 +378,14 @@ function(MFC_SETUP_TARGET)
endif()

if (ARGS_FFTW)
if (MFC_OpenACC AND ARGS_OpenACC AND (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI"))
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
if (MFC_OpenACC AND ARGS_OpenACC)
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
else()
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hipfft)
endif()
else()
find_package(FFTW REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE FFTW::FFTW)
Expand Down Expand Up @@ -420,6 +436,9 @@ function(MFC_SETUP_TARGET)
PRIVATE -gpu=autocompare,debug
)
endif()
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
Expand All @@ -438,19 +457,31 @@ if (MFC_PRE_PROCESS)
MFC_SETUP_TARGET(TARGET pre_process
SOURCES "${pre_process_SRCs}"
MPI)
if(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(pre_process PRIVATE -hfp0)
endif()
endif()

if (MFC_SIMULATION)
MFC_SETUP_TARGET(TARGET simulation
SOURCES "${simulation_SRCs}"
MPI OpenACC FFTW)

if (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" AND MFC_OpenACC)
add_custom_command(TARGET simulation POST_BUILD
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/cce_simulation_workgroup_256.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Patching & Rebuilding with Cray hacks"
)
endif()
endif()

if (MFC_POST_PROCESS)
MFC_SETUP_TARGET(TARGET post_process
SOURCES "${post_process_SRCs}"
MPI SILO HDF5 FFTW)

# -O0 is in response to https://github.com/MFlowCode/MFC-develop/issues/95
target_compile_options(post_process PRIVATE -O0)
endif()
Expand All @@ -468,7 +499,7 @@ if (MFC_DOCUMENTATION)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/docs/documentation/examples.md"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/docs/examples.sh;${examples_DOCs}"
COMMAND "bash" "${CMAKE_CURRENT_SOURCE_DIR}/docs/examples.sh"
COMMAND "bash" "${CMAKE_CURRENT_SOURCE_DIR}/docs/examples.sh"
"${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Generating examples.md"
VERBATIM
Expand Down Expand Up @@ -500,7 +531,7 @@ if (MFC_DOCUMENTATION)
set(DOXYGEN_IMAGE_PATH "\"${CMAKE_CURRENT_SOURCE_DIR}/docs/res\"\
\"${CMAKE_CURRENT_SOURCE_DIR}/docs/${target}\"")

file(MAKE_DIRECTORY "${DOXYGEN_OUTPUT_DIRECTORY}")
file(MAKE_DIRECTORY "${DOXYGEN_OUTPUT_DIRECTORY}")

configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile.in"
Expand Down Expand Up @@ -537,7 +568,7 @@ if (MFC_DOCUMENTATION)
endmacro()

add_custom_target(documentation)

find_package(Doxygen REQUIRED dot REQUIRED)

# > Fetch CSS Theme
Expand Down
2 changes: 1 addition & 1 deletion docs/Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ INTERNAL_DOCS = YES
# names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# (including Cygwin) ands Mac users are advised to set this option to NO.
# (including Cygwin) and Mac users are advised to set this option to NO.
# The default value is: system dependent.

CASE_SENSE_NAMES = YES
Expand Down
5 changes: 5 additions & 0 deletions examples/3D_weak_scaling/analyze.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# This script is ran from the 3D_weak_scaling case directory after running
# MFC with --omni -n <name>. To analyze, run chmod u+x ./analyze.sh followed
# by ./analyze.sh <name>

omniperf analyze -p workloads/$1/mi200 --metric 0 7.1.5 7.1.6 7.1.7 7.1.8 7.1.9 16.3.1 16.3.2 16.3.7 17.3.2 17.3.3 17.3.8
23 changes: 23 additions & 0 deletions misc/profiling_amdgpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Profile MFC using omniperf

0) Start an interactive session with the desired number of nodes and total tasks using `salloc -A [account] -J interactive -t 2:00:00 -p batch -N [nnodes] -n [total tasks]`

1) Generate MFC input files by running `./mfc.sh run [path to casefile] -N [nnodes] -n [total tasks] --gpu -t pre_process simulation --case-optimization`

2) Move to the simulation directory using `cd [path to casefile]`

3) - `module load` the following modules:
- rocm/5.5.1
- cray-python
- omniperf
These must be loaded in the order that they are listed.

4) Run `omniperf profile -n [profile name] -- [path to MFC beginning with /]/build/install/bin/simulation`

5) Run `omniperf analyze --gui -p [path to casefile]/workloads/[profile name]/mi200`

6) Determine what login node you're on, call it [node name]

7) Open a new terminal window and log into Frontier using `ssh -L8050:localhost:8050 username@[node name].frontier.olcf.ornl.gov`

8) Open a web browser and navigate to `http://localhost:8050/`
8 changes: 5 additions & 3 deletions src/common/include/inline_conversions.fpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#:def s_compute_speed_of_sound()
subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c)

#ifdef CRAY_ACC_WAR
!DIR$ INLINEALWAYS s_compute_speed_of_sound
#else
!$acc routine seq
#endif
real(kind(0d0)), intent(IN) :: pres
real(kind(0d0)), intent(IN) :: rho, gamma, pi_inf
real(kind(0d0)), intent(IN) :: H
Expand Down Expand Up @@ -39,7 +43,6 @@
(pres + pi_inf/(gamma + 1d0))/ &
(rho*(1d0 - adv(num_fluids)))
end if

else
c = ((H - 5d-1*vel_sum)/gamma)
end if
Expand All @@ -49,7 +52,6 @@
else
c = sqrt(c)
end if

end subroutine s_compute_speed_of_sound
#:enddef

Loading

0 comments on commit 2b3d35d

Please sign in to comment.