From c0a4b5dfb08ed6a4ce06ec74a070e4749968b68e Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Tue, 9 Jul 2024 16:48:03 -0500 Subject: [PATCH] mpl/gpu: change MPL_gpu_device_handle_t to int The abstraction of device id as an integer is a good abstraction above MPL. The opaqueness of MPL_gpu_device_handle_t, on the other hand, makes it useless, since the upperlayer can't do anything with it. For ZE, simply expose the internal device id. For new GPU runtimes that does support integer device ids, we can similarly do a map as in mpl_gpu_ze.c. --- src/mpl/include/mpl_gpu.h | 4 ++-- src/mpl/include/mpl_gpu_cuda.h | 1 - src/mpl/include/mpl_gpu_fallback.h | 1 - src/mpl/include/mpl_gpu_hip.h | 1 - src/mpl/include/mpl_gpu_ze.h | 5 ++--- src/mpl/src/gpu/mpl_gpu_cuda.c | 2 +- src/mpl/src/gpu/mpl_gpu_fallback.c | 2 +- src/mpl/src/gpu/mpl_gpu_hip.c | 2 +- src/mpl/src/gpu/mpl_gpu_ze.c | 30 ++++++++++++++++-------------- 9 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/mpl/include/mpl_gpu.h b/src/mpl/include/mpl_gpu.h index 7b84e9e25d9..16fd0049d0f 100644 --- a/src/mpl/include/mpl_gpu.h +++ b/src/mpl/include/mpl_gpu.h @@ -36,7 +36,7 @@ typedef enum { typedef struct { MPL_pointer_type_t type; - MPL_gpu_device_handle_t device; + int device; MPL_gpu_device_attr device_attr; } MPL_pointer_attr_t; @@ -125,7 +125,7 @@ int MPL_gpu_free_host(void *ptr); int MPL_gpu_register_host(const void *ptr, size_t size); int MPL_gpu_unregister_host(const void *ptr); -int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device); +int MPL_gpu_malloc(void **ptr, size_t size, int h_device); int MPL_gpu_free(void *ptr); int MPL_gpu_init(int debug_summary); diff --git a/src/mpl/include/mpl_gpu_cuda.h b/src/mpl/include/mpl_gpu_cuda.h index 6b6949bbb07..566b82ecb2e 100644 --- a/src/mpl/include/mpl_gpu_cuda.h +++ b/src/mpl/include/mpl_gpu_cuda.h @@ -10,7 +10,6 @@ #include "cuda_runtime_api.h" typedef cudaIpcMemHandle_t MPL_gpu_ipc_mem_handle_t; -typedef int MPL_gpu_device_handle_t; typedef struct cudaPointerAttributes MPL_gpu_device_attr; typedef int MPL_gpu_request; typedef cudaStream_t MPL_gpu_stream_t; diff --git a/src/mpl/include/mpl_gpu_fallback.h b/src/mpl/include/mpl_gpu_fallback.h index 626057fe7bc..12b89f2137a 100644 --- a/src/mpl/include/mpl_gpu_fallback.h +++ b/src/mpl/include/mpl_gpu_fallback.h @@ -7,7 +7,6 @@ #define MPL_GPU_CUDA_H_INCLUDED typedef int MPL_gpu_ipc_mem_handle_t; -typedef int MPL_gpu_device_handle_t; typedef int MPL_gpu_device_attr; /* dummy type */ typedef int MPL_gpu_request; typedef int MPL_gpu_stream_t; diff --git a/src/mpl/include/mpl_gpu_hip.h b/src/mpl/include/mpl_gpu_hip.h index 3b12778087e..1b0f1bd2a32 100644 --- a/src/mpl/include/mpl_gpu_hip.h +++ b/src/mpl/include/mpl_gpu_hip.h @@ -14,7 +14,6 @@ #include "hip/hip_runtime_api.h" typedef hipIpcMemHandle_t MPL_gpu_ipc_mem_handle_t; -typedef int MPL_gpu_device_handle_t; typedef struct hipPointerAttribute_t MPL_gpu_device_attr; typedef int MPL_gpu_request; typedef hipStream_t MPL_gpu_stream_t; diff --git a/src/mpl/include/mpl_gpu_ze.h b/src/mpl/include/mpl_gpu_ze.h index 31f3a5335a5..78c5093aebf 100644 --- a/src/mpl/include/mpl_gpu_ze.h +++ b/src/mpl/include/mpl_gpu_ze.h @@ -26,7 +26,6 @@ typedef struct _MPL_gpu_ipc_mem_handle_t { fd_pid_t data; } MPL_gpu_ipc_mem_handle_t; -typedef ze_device_handle_t MPL_gpu_device_handle_t; typedef ze_alloc_attr_t MPL_gpu_device_attr; typedef struct MPL_cmdlist_pool { @@ -52,7 +51,7 @@ typedef int MPL_gpu_stream_t; typedef volatile int MPL_gpu_event_t; #define MPL_GPU_STREAM_DEFAULT 0 -#define MPL_GPU_DEVICE_INVALID NULL +#define MPL_GPU_DEVICE_INVALID -1 #define MPL_GPU_DEV_AFFINITY_ENV "ZE_AFFINITY_MASK" @@ -67,7 +66,7 @@ int MPL_ze_ipc_handle_map(MPL_gpu_ipc_mem_handle_t * ipc_handle, int is_shared_h int MPL_ze_ipc_handle_mmap_host(MPL_gpu_ipc_mem_handle_t * ipc_handle, int shared_handle, int dev_id, size_t size, void **ptr); int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr, - MPL_gpu_device_handle_t device, void **mmaped_ptr); + int device, void **mmaped_ptr); int MPL_ze_mmap_handle_unmap(void *ptr, int dev_id); #endif /* ifndef MPL_GPU_ZE_H_INCLUDED */ diff --git a/src/mpl/src/gpu/mpl_gpu_cuda.c b/src/mpl/src/gpu/mpl_gpu_cuda.c index 43d49fc0d98..a43da1cd1b9 100644 --- a/src/mpl/src/gpu/mpl_gpu_cuda.c +++ b/src/mpl/src/gpu/mpl_gpu_cuda.c @@ -264,7 +264,7 @@ int MPL_gpu_unregister_host(const void *ptr) goto fn_exit; } -int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device) +int MPL_gpu_malloc(void **ptr, size_t size, int h_device) { int mpl_err = MPL_SUCCESS; int prev_devid; diff --git a/src/mpl/src/gpu/mpl_gpu_fallback.c b/src/mpl/src/gpu/mpl_gpu_fallback.c index f1ef9155a0c..2c4f0c8cdf7 100644 --- a/src/mpl/src/gpu/mpl_gpu_fallback.c +++ b/src/mpl/src/gpu/mpl_gpu_fallback.c @@ -78,7 +78,7 @@ int MPL_gpu_unregister_host(const void *ptr) return MPL_SUCCESS; } -int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device) +int MPL_gpu_malloc(void **ptr, size_t size, int h_device) { abort(); return MPL_ERR_GPU_INTERNAL; diff --git a/src/mpl/src/gpu/mpl_gpu_hip.c b/src/mpl/src/gpu/mpl_gpu_hip.c index e60517cbb85..869fc379972 100644 --- a/src/mpl/src/gpu/mpl_gpu_hip.c +++ b/src/mpl/src/gpu/mpl_gpu_hip.c @@ -291,7 +291,7 @@ int MPL_gpu_unregister_host(const void *ptr) goto fn_exit; } -int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device) +int MPL_gpu_malloc(void **ptr, size_t size, int h_device) { int mpl_err = MPL_SUCCESS; int prev_devid; diff --git a/src/mpl/src/gpu/mpl_gpu_ze.c b/src/mpl/src/gpu/mpl_gpu_ze.c index 30782c93b80..7ade5fb9156 100644 --- a/src/mpl/src/gpu/mpl_gpu_ze.c +++ b/src/mpl/src/gpu/mpl_gpu_ze.c @@ -692,7 +692,7 @@ static int get_physical_device(int dev_id) } /* Get dev_id from device handle */ -MPL_STATIC_INLINE_PREFIX int device_to_dev_id(MPL_gpu_device_handle_t device) +MPL_STATIC_INLINE_PREFIX int device_to_dev_id(ze_device_handle_t device) { int dev_id = -1; for (int d = 0; d < local_ze_device_count; d++) { @@ -706,7 +706,7 @@ MPL_STATIC_INLINE_PREFIX int device_to_dev_id(MPL_gpu_device_handle_t device) } /* Get device from dev_id */ -MPL_STATIC_INLINE_PREFIX int dev_id_to_device(int dev_id, MPL_gpu_device_handle_t * device) +MPL_STATIC_INLINE_PREFIX int dev_id_to_device(int dev_id, ze_device_handle_t * device) { int mpl_err = MPL_SUCCESS; @@ -1774,7 +1774,7 @@ int MPL_gpu_ipc_handle_destroy(const void *ptr, MPL_pointer_attr_t * gpu_attr) } if (likely(MPL_gpu_info.specialized_cache)) { - dev_id = device_to_dev_id(gpu_attr->device); + dev_id = gpu_attr->device; if (dev_id == -1) { goto fn_fail; } @@ -2053,7 +2053,7 @@ int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr) ret = zeMemGetAllocProperties(ze_context, ptr, &attr->device_attr.prop, &attr->device_attr.device); ZE_ERR_CHECK(ret); - attr->device = attr->device_attr.device; + attr->device = device_to_dev_id(attr->device_attr.device); switch (attr->device_attr.prop.type) { case ZE_MEMORY_TYPE_UNKNOWN: attr->type = MPL_GPU_POINTER_UNREGISTERED_HOST; @@ -2127,7 +2127,7 @@ int MPL_gpu_query_is_same_dev(int global_dev1, int global_dev2) #endif } -int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device) +int MPL_gpu_malloc(void **ptr, size_t size, int h_device) { int mpl_err = MPL_SUCCESS; int ret; @@ -2138,10 +2138,16 @@ int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device) .flags = 0, .ordinal = 0, /* We currently support a single memory type */ }; + + ze_device_handle_t device_handle; + ret = dev_id_to_device(h_device, &device_handle); + if (ret) { + goto fn_fail; + } /* Currently ZE ignores this argument and uses an internal alignment * value. However, this behavior can change in the future. */ mem_alignment = 1; - ret = zeMemAllocDevice(ze_context, &device_desc, size, mem_alignment, h_device, ptr); + ret = zeMemAllocDevice(ze_context, &device_desc, size, mem_alignment, device_handle, ptr); ZE_ERR_CHECK(ret); @@ -2238,11 +2244,7 @@ int MPL_gpu_unregister_host(const void *ptr) int MPL_gpu_get_dev_id_from_attr(MPL_pointer_attr_t * attr) { - int dev_id = -1; - - dev_id = device_to_dev_id(attr->device); - - return dev_id; + return attr->device; } int MPL_gpu_get_buffer_bounds(const void *ptr, void **pbase, uintptr_t * len) @@ -3044,7 +3046,7 @@ int MPL_ze_ipc_handle_map(MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle, int is_shar ze_result_t ret; int status; uint32_t nfds; - MPL_gpu_device_handle_t dev_handle; + ze_device_handle_t dev_handle; fd_pid_t h; h = mpl_ipc_handle->data; @@ -3210,7 +3212,7 @@ int MPL_ze_ipc_handle_mmap_host(MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle, int i /* this function takes a local device pointer and mmap to host */ int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr, - MPL_gpu_device_handle_t device, void **mmaped_ptr) + int device, void **mmaped_ptr) { ze_result_t ret; int mpl_err = MPL_SUCCESS; @@ -3228,7 +3230,7 @@ int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr, offset = (char *) dptr - (char *) pbase; mem_id = attr->prop.id; - local_dev_id = device_to_dev_id(device); + local_dev_id = device; if (local_dev_id == -1) { goto fn_fail; }