Merge pull request #12 from ANL-CESAR/hip

Add HIP version
ANL-CESAR · Sep 20, 2021 · 9165a03 · 9165a03
2 parents 13a7981 + a2e8796
commit 9165a03
Show file tree

Hide file tree

Showing 28 changed files with 1,787 additions and 64 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,16 @@
+=====================================================================
+NEW IN VERSION 13
+=====================================================================
+- (Feature) Added HIP port of RSBench. This port is based closely
+  off the CUDA version, and was generated using an automated code
+  conversion utility with only a few manual changes required.
+
+- Fixed threads per block for CUDA/HIP/OpenCL to all use 256 threads.
+  Other models will select this value themselves, but it may be
+  worth testing configurations maually with those models as well.
+
+- Added a warning about GPU timers to output.
+
 =====================================================================
 NEW IN VERSION 12
 =====================================================================

diff --git a/README.md b/README.md
@@ -41,6 +41,9 @@ This version of RSBench is written in OpenCL, and can be used for CPU, GPU, FPGA
 4. **RSBench/sycl**
 This version of RSBench is written in SYCL, and can be used for CPU, GPU, FPGA, or other architectures that support OpenCL and SYCL. It was written with GPUs in mind, so if running on other architectures you may need to heavily re-optimize the code. You will also likely need to edit the makefile to supply the path to your SYCL compiler.
 
+5. **RSBench/hip**
+This version of RSBench is written in HIP for use with GPU architectures. This version is derived from CUDA using an automatic conversion tool with only a few small manual changes.
+
 ## Compilation
 
 To compile RSBench with default settings, navigate to your selected source directory and use the following command:

diff --git a/cuda/Makefile b/cuda/Makefile
@@ -6,7 +6,7 @@ COMPILER    = nvidia
 OPTIMIZE    = yes
 DEBUG       = no
 PROFILE     = no
-SM_VERSION  = 37
+SM_VERSION  = 80
 
 #===============================================================================
 # Program name & source code list
@@ -31,10 +31,10 @@ obj = $(source:.cu=.o)
 # Standard Flags
 CFLAGS := 
 
-# Regular gcc Compiler
+# NVIDIA Compiler
 ifeq ($(COMPILER),nvidia)
   CC = nvcc
-  CFLAGS += -Xcompiler -Wall -Xcompiler -O3 -arch=sm_$(SM_VERSION) -std=c++11
+  CFLAGS += -Xcompiler -Wall -Xcompiler -O3 -arch=sm_$(SM_VERSION) -std=c++14
 endif
 
 # Linker Flags

diff --git a/cuda/io.cu b/cuda/io.cu
@@ -288,6 +288,7 @@ void print_input_summary(Input input)
 
 int validate_and_print_results(Input input, double runtime, unsigned long vhash)
 {
+  printf("NOTE: Timings are estimated -- use nvprof/nsys/iprof/rocprof for formal analysis\n");
 	printf("Runtime:               %.3lf seconds\n", runtime);
 	int lookups = 0;
 	if( input.simulation_method == HISTORY_BASED )

diff --git a/cuda/main.cu b/cuda/main.cu
@@ -6,7 +6,7 @@ int main(int argc, char * argv[])
 	// Initialization & Command Line Read-In
 	// =====================================================================
 
-	int version = 12;
+	int version = 13;
 	double start, stop;
 
 	// Process CLI Fields

diff --git a/cuda/simulation.cu b/cuda/simulation.cu
@@ -19,8 +19,8 @@ void run_event_based_simulation(Input input, SimulationData GSD, unsigned long *
 	////////////////////////////////////////////////////////////////////////////////
 	printf("Running baseline event-based simulation on device...\n");
 
-	int nthreads = 32;
-	int nblocks = ceil( (double) input.lookups / 32.0);
+	int nthreads = 256;
+	int nblocks = ceil( (double) input.lookups / (double) nthreads);
 
 	xs_lookup_kernel_baseline<<<nblocks, nthreads>>>( input, GSD );
 	gpuErrchk( cudaPeekAtLastError() );

diff --git a/hip/Makefile b/hip/Makefile
@@ -0,0 +1,76 @@
+#===============================================================================
+# User Options
+#===============================================================================
+
+COMPILER    = amd
+OPTIMIZE    = yes
+DEBUG       = no
+PROFILE     = no
+
+#===============================================================================
+# Program name & source code list
+#===============================================================================
+
+program = rsbench
+
+source = \
+main.hip \
+simulation.hip\
+io.hip \
+init.hip \
+material.hip \
+utils.hip
+
+obj = $(source:.hip=.o)
+
+#===============================================================================
+# Sets Flags
+#===============================================================================
+
+# Standard Flags
+CFLAGS := 
+
+# AMD
+ifeq ($(COMPILER),amd)
+  CC = hipcc
+  CFLAGS += -std=c++14
+endif
+
+# Linker Flags
+LDFLAGS = -lm
+
+# Debug Flags
+ifeq ($(DEBUG),yes)
+  CFLAGS += -g -G
+  LDFLAGS += -g -G
+endif
+
+# Profiling Flags
+ifeq ($(PROFILE),yes)
+  CFLAGS += -pg
+  LDFLAGS  += -pg
+endif
+
+# Optimization Flags
+ifeq ($(OPTIMIZE),yes)
+  CFLAGS += -O3
+endif
+
+#===============================================================================
+# Targets to Build
+#===============================================================================
+
+$(program): $(obj) rsbench.h Makefile
+	$(CC) $(CFLAGS) $(obj) -o $@ $(LDFLAGS)
+
+%.o: %.hip rsbench.h Makefile
+	$(CC) $(CFLAGS) -c $< -o $@
+
+clean:
+	rm -rf rsbench $(obj)
+
+edit:
+	vim -p $(source) rsbench.h
+
+run:
+	./rsbench