From e544a70b49cb2b77ce88321a646802499f008d6d Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Wed, 10 Jan 2024 16:10:22 -0500 Subject: [PATCH 1/4] setup --- setup.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 9b3833b..5f6fd55 100644 --- a/setup.py +++ b/setup.py @@ -4,13 +4,14 @@ import torch from torch.utils.cpp_extension import BuildExtension, CUDAExtension -if not torch.cuda.is_available(): - if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None: - os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0" +if os.environ.get("TORCH_CUDA_ARCH_LIST"): + # Let PyTorch builder to choose device to target for. + device_capability = "" +else: + device_capability = torch.cuda.get_device_capability() + device_capability = f"{device_capability[0]}{device_capability[1]}" cwd = Path(os.path.dirname(os.path.abspath(__file__))) -_dc = torch.cuda.get_device_capability() -_dc = f"{_dc[0]}{_dc[1]}" ext_modules = [ CUDAExtension( @@ -25,8 +26,8 @@ "-fopenmp", "-fPIC", "-Wno-strict-aliasing" ], "nvcc": [ - f"--generate-code=arch=compute_{_dc},code=sm_{_dc}", - f"-DGROUPED_GEMM_DEVICE_CAPABILITY={_dc}", + f"--generate-code=arch=compute_{device_capability},code=sm_{device_capability}", + f"-DGROUPED_GEMM_DEVICE_CAPABILITY={device_capability}", # NOTE: CUTLASS requires c++17. "-std=c++17", ], From 37be1fb6cc3c8dcadf18cae500e841379881dacd Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Wed, 10 Jan 2024 16:56:32 -0500 Subject: [PATCH 2/4] fix --- setup.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 5f6fd55..af5f578 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,16 @@ cwd = Path(os.path.dirname(os.path.abspath(__file__))) +nvcc_flags = [ + "-std=c++17", # NOTE: CUTLASS requires c++17 +] + +if device_capability: + nvcc_flags.extend( + f"--generate-code=arch=compute_{device_capability},code=sm_{device_capability}", + f"-DGROUPED_GEMM_DEVICE_CAPABILITY={device_capability}", + ) + ext_modules = [ CUDAExtension( "grouped_gemm_backend", @@ -25,12 +35,7 @@ "cxx": [ "-fopenmp", "-fPIC", "-Wno-strict-aliasing" ], - "nvcc": [ - f"--generate-code=arch=compute_{device_capability},code=sm_{device_capability}", - f"-DGROUPED_GEMM_DEVICE_CAPABILITY={device_capability}", - # NOTE: CUTLASS requires c++17. - "-std=c++17", - ], + "nvcc": nvcc_flags, } ) ] From 5ef3699272ebfabbd8886adbefec094c5e5efb4b Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Thu, 11 Jan 2024 15:33:10 -0500 Subject: [PATCH 3/4] fix typo --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index af5f578..8fcdb76 100644 --- a/setup.py +++ b/setup.py @@ -18,10 +18,10 @@ ] if device_capability: - nvcc_flags.extend( + nvcc_flags.extend([ f"--generate-code=arch=compute_{device_capability},code=sm_{device_capability}", f"-DGROUPED_GEMM_DEVICE_CAPABILITY={device_capability}", - ) + ]) ext_modules = [ CUDAExtension( From 35034acc54da6b55815a568b112c13110d3a2e40 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Thu, 11 Jan 2024 15:34:28 -0500 Subject: [PATCH 4/4] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8fcdb76..d3299eb 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ setup( name="grouped_gemm", - version="0.0.1", + version="0.1.1", author="Trevor Gale", author_email="tgale@stanford.edu", description="Grouped GEMM",