Skip to content

Commit

Permalink
Merge pull request #12 from ANL-CESAR/hip
Browse files Browse the repository at this point in the history
Add HIP version
  • Loading branch information
jtramm authored Sep 20, 2021
2 parents 13a7981 + a2e8796 commit 9165a03
Show file tree
Hide file tree
Showing 28 changed files with 1,787 additions and 64 deletions.
13 changes: 13 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
=====================================================================
NEW IN VERSION 13
=====================================================================
- (Feature) Added HIP port of RSBench. This port is based closely
off the CUDA version, and was generated using an automated code
conversion utility with only a few manual changes required.

- Fixed threads per block for CUDA/HIP/OpenCL to all use 256 threads.
Other models will select this value themselves, but it may be
worth testing configurations maually with those models as well.

- Added a warning about GPU timers to output.

=====================================================================
NEW IN VERSION 12
=====================================================================
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ This version of RSBench is written in OpenCL, and can be used for CPU, GPU, FPGA
4. **RSBench/sycl**
This version of RSBench is written in SYCL, and can be used for CPU, GPU, FPGA, or other architectures that support OpenCL and SYCL. It was written with GPUs in mind, so if running on other architectures you may need to heavily re-optimize the code. You will also likely need to edit the makefile to supply the path to your SYCL compiler.

5. **RSBench/hip**
This version of RSBench is written in HIP for use with GPU architectures. This version is derived from CUDA using an automatic conversion tool with only a few small manual changes.

## Compilation

To compile RSBench with default settings, navigate to your selected source directory and use the following command:
Expand Down
6 changes: 3 additions & 3 deletions cuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ COMPILER = nvidia
OPTIMIZE = yes
DEBUG = no
PROFILE = no
SM_VERSION = 37
SM_VERSION = 80

#===============================================================================
# Program name & source code list
Expand All @@ -31,10 +31,10 @@ obj = $(source:.cu=.o)
# Standard Flags
CFLAGS :=

# Regular gcc Compiler
# NVIDIA Compiler
ifeq ($(COMPILER),nvidia)
CC = nvcc
CFLAGS += -Xcompiler -Wall -Xcompiler -O3 -arch=sm_$(SM_VERSION) -std=c++11
CFLAGS += -Xcompiler -Wall -Xcompiler -O3 -arch=sm_$(SM_VERSION) -std=c++14
endif

# Linker Flags
Expand Down
1 change: 1 addition & 0 deletions cuda/io.cu
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ void print_input_summary(Input input)

int validate_and_print_results(Input input, double runtime, unsigned long vhash)
{
printf("NOTE: Timings are estimated -- use nvprof/nsys/iprof/rocprof for formal analysis\n");
printf("Runtime: %.3lf seconds\n", runtime);
int lookups = 0;
if( input.simulation_method == HISTORY_BASED )
Expand Down
2 changes: 1 addition & 1 deletion cuda/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ int main(int argc, char * argv[])
// Initialization & Command Line Read-In
// =====================================================================

int version = 12;
int version = 13;
double start, stop;

// Process CLI Fields
Expand Down
4 changes: 2 additions & 2 deletions cuda/simulation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ void run_event_based_simulation(Input input, SimulationData GSD, unsigned long *
////////////////////////////////////////////////////////////////////////////////
printf("Running baseline event-based simulation on device...\n");

int nthreads = 32;
int nblocks = ceil( (double) input.lookups / 32.0);
int nthreads = 256;
int nblocks = ceil( (double) input.lookups / (double) nthreads);

xs_lookup_kernel_baseline<<<nblocks, nthreads>>>( input, GSD );
gpuErrchk( cudaPeekAtLastError() );
Expand Down
76 changes: 76 additions & 0 deletions hip/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#===============================================================================
# User Options
#===============================================================================

COMPILER = amd
OPTIMIZE = yes
DEBUG = no
PROFILE = no

#===============================================================================
# Program name & source code list
#===============================================================================

program = rsbench

source = \
main.hip \
simulation.hip\
io.hip \
init.hip \
material.hip \
utils.hip

obj = $(source:.hip=.o)

#===============================================================================
# Sets Flags
#===============================================================================

# Standard Flags
CFLAGS :=

# AMD
ifeq ($(COMPILER),amd)
CC = hipcc
CFLAGS += -std=c++14
endif

# Linker Flags
LDFLAGS = -lm

# Debug Flags
ifeq ($(DEBUG),yes)
CFLAGS += -g -G
LDFLAGS += -g -G
endif

# Profiling Flags
ifeq ($(PROFILE),yes)
CFLAGS += -pg
LDFLAGS += -pg
endif

# Optimization Flags
ifeq ($(OPTIMIZE),yes)
CFLAGS += -O3
endif

#===============================================================================
# Targets to Build
#===============================================================================

$(program): $(obj) rsbench.h Makefile
$(CC) $(CFLAGS) $(obj) -o $@ $(LDFLAGS)

%.o: %.hip rsbench.h Makefile
$(CC) $(CFLAGS) -c $< -o $@

clean:
rm -rf rsbench $(obj)

edit:
vim -p $(source) rsbench.h

run:
./rsbench
Loading

0 comments on commit 9165a03

Please sign in to comment.