From fdc67f7fdadc19899f1c3068bc1e2ae1985d59d1 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 17 Jan 2025 11:44:42 -0800 Subject: [PATCH 01/10] device porperties first review --- cuda_core/cuda/core/experimental/_device.py | 304 +++++++++++++++++++- cuda_core/tests/test_device.py | 97 +++++++ 2 files changed, 397 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 747174b2..d5875bba 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -14,6 +14,299 @@ _tls_lock = threading.Lock() +class DeviceProperties: + """ + Represents the properties of a CUDA device. + + Attributes + ---------- + name : str + ASCII string identifying the device. + uuid : cudaUUID_t + 16-byte unique identifier. + total_global_mem : int + Total amount of global memory available on the device in bytes. + shared_mem_per_block : int + Maximum amount of shared memory available to a thread block in bytes. + regs_per_block : int + Maximum number of 32-bit registers available to a thread block. + warp_size : int + Warp size in threads. + mem_pitch : int + Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through + cudaMallocPitch(). + max_threads_per_block : int + Maximum number of threads per block. + max_threads_dim : tuple + Maximum size of each dimension of a block. + max_grid_size : tuple + Maximum size of each dimension of a grid. + clock_rate : int + Clock frequency in kilohertz. + total_const_mem : int + Total amount of constant memory available on the device in bytes. + major : int + Major revision number defining the device's compute capability. + minor : int + Minor revision number defining the device's compute capability. + texture_alignment : int + Alignment requirement; texture base addresses that are aligned to textureAlignment bytes do not need an + offset applied to texture fetches. + texture_pitch_alignment : int + Pitch alignment requirement for 2D texture references that are bound to pitched memory. + device_overlap : int + 1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not. + multi_processor_count : int + Number of multiprocessors on the device. + kernel_exec_timeout_enabled : int + 1 if there is a run time limit for kernels executed on the device, or 0 if not. + integrated : int + 1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component. + can_map_host_memory : int + 1 if the device can map host memory into the CUDA address space for use with + cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not. + compute_mode : int + Compute mode that the device is currently in. + max_texture_1d : int + Maximum 1D texture size. + max_texture_1d_mipmap : int + Maximum 1D mipmapped texture size. + max_texture_1d_linear : int + Maximum 1D texture size for textures bound to linear memory. + max_texture_2d : tuple + Maximum 2D texture dimensions. + max_texture_2d_mipmap : tuple + Maximum 2D mipmapped texture dimensions. + max_texture_2d_linear : tuple + Maximum 2D texture dimensions for 2D textures bound to pitch linear memory. + max_texture_2d_gather : tuple + Maximum 2D texture dimensions if texture gather operations have to be performed. + max_texture_3d : tuple + Maximum 3D texture dimensions. + max_texture_3d_alt : tuple + Maximum alternate 3D texture dimensions. + max_texture_cubemap : int + Maximum cubemap texture width or height. + max_texture_1d_layered : tuple + Maximum 1D layered texture dimensions. + max_texture_2d_layered : tuple + Maximum 2D layered texture dimensions. + max_texture_cubemap_layered : tuple + Maximum cubemap layered texture dimensions. + max_surface_1d : int + Maximum 1D surface size. + max_surface_2d : tuple + Maximum 2D surface dimensions. + max_surface_3d : tuple + Maximum 3D surface dimensions. + max_surface_1d_layered : tuple + Maximum 1D layered surface dimensions. + max_surface_2d_layered : tuple + Maximum 2D layered surface dimensions. + max_surface_cubemap : int + Maximum cubemap surface width or height. + max_surface_cubemap_layered : tuple + Maximum cubemap layered surface dimensions. + surface_alignment : int + Alignment requirements for surfaces. + concurrent_kernels : int + 1 if the device supports executing multiple kernels within the same context simultaneously, or 0 if + not. + ecc_enabled : int + 1 if the device has ECC support turned on, or 0 if not. + pci_bus_id : int + PCI bus identifier of the device. + pci_device_id : int + PCI device (sometimes called slot) identifier of the device. + pci_domain_id : int + PCI domain identifier of the device. + tcc_driver : int + 1 if the device is using a TCC driver or 0 if not. + async_engine_count : int + 1 when the device can concurrently copy memory between host and device while executing a kernel. + It is 2 when the device can concurrently copy memory between host and device in both directions + and execute a kernel at the same time. It is 0 if neither of these is supported. + unified_addressing : int + 1 if the device shares a unified address space with the host and 0 otherwise. + memory_clock_rate : int + Peak memory clock frequency in kilohertz. + memory_bus_width : int + Memory bus width in bits. + l2_cache_size : int + L2 cache size in bytes. + persisting_l2_cache_max_size : int + L2 cache's maximum persisting lines size in bytes. + max_threads_per_multi_processor : int + Number of maximum resident threads per multiprocessor. + stream_priorities_supported : int + 1 if the device supports stream priorities, or 0 if it is not supported. + global_l1_cache_supported : int + 1 if the device supports caching of globals in L1 cache, or 0 if it is not supported. + local_l1_cache_supported : int + 1 if the device supports caching of locals in L1 cache, or 0 if it is not supported. + shared_mem_per_multiprocessor : int + Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all + thread blocks simultaneously resident on a multiprocessor. + regs_per_multiprocessor : int + Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread + blocks simultaneously resident on a multiprocessor. + managed_memory : int + 1 if the device supports allocating managed memory on this system, or 0 if it is not supported. + is_multi_gpu_board : int + 1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not. + multi_gpu_board_group_id : int + Unique identifier for a group of devices associated with the same board. Devices on the same + multi-GPU board will share the same identifier. + single_to_double_precision_perf_ratio : int + Ratio of single precision performance (in floating-point operations per second) to double precision + performance. + pageable_memory_access : int + 1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it, + and 0 otherwise. + concurrent_managed_access : int + 1 if the device can coherently access managed memory concurrently with the CPU, and 0 otherwise. + compute_preemption_supported : int + 1 if the device supports Compute Preemption, and 0 otherwise. + can_use_host_pointer_for_registered_mem : int + 1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise. + cooperative_launch : int + 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise. + cooperative_multi_device_launch : int + 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0 + otherwise. + pageable_memory_access_uses_host_page_tables : int + 1 if the device accesses pageable memory via the host's page tables, and 0 otherwise. + direct_managed_mem_access_from_host : int + 1 if the host can directly access managed memory on the device without migration, and 0 otherwise. + access_policy_max_window_size : int + Maximum value of cudaAccessPolicyWindow::num_bytes. + reserved_shared_mem_per_block : int + Shared memory reserved by CUDA driver per block in bytes. + host_register_supported : int + 1 if the device supports host memory registration via cudaHostRegister, and 0 otherwise. + sparse_cuda_array_supported : int + 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise. + host_register_read_only_supported : int + 1 if the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory + that must be mapped as read-only to the GPU. + timeline_semaphore_interop_supported : int + 1 if external timeline semaphore interop is supported on the device, 0 otherwise. + memory_pools_supported : int + 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise. + gpu_direct_rdma_supported : int + 1 if the device supports GPUDirect RDMA APIs, 0 otherwise. + gpu_direct_rdma_flush_writes_options : int + Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum. + gpu_direct_rdma_writes_ordering : int + See the cudaGPUDirectRDMAWritesOrdering enum for numerical values. + memory_pool_supported_handle_types : int + Bitmask of handle types supported with mempool-based IPC. + deferred_mapping_cuda_array_supported : int + 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. + ipc_event_supported : int + 1 if the device supports IPC Events, and 0 otherwise. + unified_function_pointers : int + 1 if the device supports unified pointers, and 0 otherwise. + """ + + def _init(device_id): + self = DeviceProperties.__new__(DeviceProperties) + + prop = handle_return(runtime.cudaGetDeviceProperties(device_id)) + + self.name = prop.name.decode("utf-8") + self.uuid = prop.uuid.bytes + self.total_global_mem = prop.totalGlobalMem + self.shared_mem_per_block = prop.sharedMemPerBlock + self.regs_per_block = prop.regsPerBlock + self.warp_size = prop.warpSize + self.mem_pitch = prop.memPitch + self.max_threads_per_block = prop.maxThreadsPerBlock + self.max_threads_dim = tuple(prop.maxThreadsDim) + self.max_grid_size = tuple(prop.maxGridSize) + self.clock_rate = prop.clockRate + self.total_const_mem = prop.totalConstMem + self.major = prop.major + self.minor = prop.minor + self.texture_alignment = prop.textureAlignment + self.texture_pitch_alignment = prop.texturePitchAlignment + self.device_overlap = prop.deviceOverlap + self.multi_processor_count = prop.multiProcessorCount + self.kernel_exec_timeout_enabled = prop.kernelExecTimeoutEnabled + self.integrated = prop.integrated + self.can_map_host_memory = prop.canMapHostMemory + self.compute_mode = prop.computeMode + self.max_texture_1d = prop.maxTexture1D + self.max_texture_1d_mipmap = prop.maxTexture1DMipmap + self.max_texture_1d_linear = prop.maxTexture1DLinear + self.max_texture_2d = tuple(prop.maxTexture2D) + self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap) + self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear) + self.max_texture_2d_gather = tuple(prop.maxTexture2DGather) + self.max_texture_3d = tuple(prop.maxTexture3D) + self.max_texture_3d_alt = tuple(prop.maxTexture3DAlt) + self.max_texture_cubemap = prop.maxTextureCubemap + self.max_texture_1d_layered = tuple(prop.maxTexture1DLayered) + self.max_texture_2d_layered = tuple(prop.maxTexture2DLayered) + self.max_texture_cubemap_layered = tuple(prop.maxTextureCubemapLayered) + self.max_surface_1d = prop.maxSurface1D + self.max_surface_2d = tuple(prop.maxSurface2D) + self.max_surface_3d = tuple(prop.maxSurface3D) + self.max_surface_1d_layered = tuple(prop.maxSurface1DLayered) + self.max_surface_2d_layered = tuple(prop.maxSurface2DLayered) + self.max_surface_cubemap = prop.maxSurfaceCubemap + self.max_surface_cubemap_layered = tuple(prop.maxSurfaceCubemapLayered) + self.surface_alignment = prop.surfaceAlignment + self.concurrent_kernels = prop.concurrentKernels + self.ecc_enabled = prop.ECCEnabled + self.pci_bus_id = prop.pciBusID + self.pci_device_id = prop.pciDeviceID + self.pci_domain_id = prop.pciDomainID + self.tcc_driver = prop.tccDriver + self.async_engine_count = prop.asyncEngineCount + self.unified_addressing = prop.unifiedAddressing + self.memory_clock_rate = prop.memoryClockRate + self.memory_bus_width = prop.memoryBusWidth + self.l2_cache_size = prop.l2CacheSize + self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize + self.max_threads_per_multi_processor = prop.maxThreadsPerMultiProcessor + self.stream_priorities_supported = prop.streamPrioritiesSupported + self.global_l1_cache_supported = prop.globalL1CacheSupported + self.local_l1_cache_supported = prop.localL1CacheSupported + self.shared_mem_per_multiprocessor = prop.sharedMemPerMultiprocessor + self.regs_per_multiprocessor = prop.regsPerMultiprocessor + self.managed_memory = prop.managedMemory + self.is_multi_gpu_board = prop.isMultiGpuBoard + self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID + self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio + self.pageable_memory_access = prop.pageableMemoryAccess + self.concurrent_managed_access = prop.concurrentManagedAccess + self.compute_preemption_supported = prop.computePreemptionSupported + self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem + self.cooperative_launch = prop.cooperativeLaunch + self.cooperative_multi_device_launch = prop.cooperativeMultiDeviceLaunch + self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables + self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost + self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize + self.reserved_shared_mem_per_block = prop.reservedSharedMemPerBlock + self.host_register_supported = prop.hostRegisterSupported + self.sparse_cuda_array_supported = prop.sparseCudaArraySupported + self.host_register_read_only_supported = prop.hostRegisterReadOnlySupported + self.timeline_semaphore_interop_supported = prop.timelineSemaphoreInteropSupported + self.memory_pools_supported = prop.memoryPoolsSupported + self.gpu_direct_rdma_supported = prop.gpuDirectRDMASupported + self.gpu_direct_rdma_flush_writes_options = prop.gpuDirectRDMAFlushWritesOptions + self.gpu_direct_rdma_writes_ordering = prop.gpuDirectRDMAWritesOrdering + self.memory_pool_supported_handle_types = prop.memoryPoolSupportedHandleTypes + self.deferred_mapping_cuda_array_supported = prop.deferredMappingCudaArraySupported + self.ipc_event_supported = prop.ipcEventSupported + self.unified_function_pointers = prop.unifiedFunctionPointers + return self + + def __init__(self, device_id): + raise RuntimeError("DeviceProperties should not be instantiated directly") + + class Device: """Represent a GPU and act as an entry point for cuda.core features. @@ -41,7 +334,7 @@ class Device: """ - __slots__ = ("_id", "_mr", "_has_inited") + __slots__ = ("_id", "_mr", "_has_inited", "_properties") def __new__(cls, device_id=None): # important: creating a Device instance does not initialize the GPU! @@ -73,6 +366,7 @@ def __new__(cls, device_id=None): dev._mr = _SynchronousMemoryResource(dev_id) dev._has_inited = False + dev._properties = None _tls.devices.append(dev) return _tls.devices[device_id] @@ -124,10 +418,12 @@ def name(self) -> str: return name.decode() @property - def properties(self) -> dict: + def properties(self) -> DeviceProperties: """Return information about the compute-device.""" - # TODO: pythonize the key names - return handle_return(runtime.cudaGetDeviceProperties(self._id)) + if self._properties is None: + self._properties = DeviceProperties._init(self._id) + + return self._properties @property def compute_capability(self) -> ComputeCapability: diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 876299f3..571b7dd8 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -78,3 +78,100 @@ def test_compute_capability(): ) expected_cc = ComputeCapability(major, minor) assert device.compute_capability == expected_cc + + +def test_device_property_values(): + device = Device() + assert device.properties.name == device.name + assert device.properties.uuid.hex() == device.uuid.replace("-", "") + + +def test_device_property_types(): + device = Device() + assert isinstance(device.properties.name, str) + assert isinstance(device.properties.uuid, bytes) + assert isinstance(device.properties.total_global_mem, int) + assert isinstance(device.properties.shared_mem_per_block, int) + assert isinstance(device.properties.regs_per_block, int) + assert isinstance(device.properties.warp_size, int) + assert isinstance(device.properties.mem_pitch, int) + assert isinstance(device.properties.max_threads_per_block, int) + assert isinstance(device.properties.max_threads_dim, tuple) + assert isinstance(device.properties.max_grid_size, tuple) + assert isinstance(device.properties.clock_rate, int) + assert isinstance(device.properties.total_const_mem, int) + assert isinstance(device.properties.major, int) + assert isinstance(device.properties.minor, int) + assert isinstance(device.properties.texture_alignment, int) + assert isinstance(device.properties.texture_pitch_alignment, int) + assert isinstance(device.properties.device_overlap, int) + assert isinstance(device.properties.multi_processor_count, int) + assert isinstance(device.properties.kernel_exec_timeout_enabled, int) + assert isinstance(device.properties.integrated, int) + assert isinstance(device.properties.can_map_host_memory, int) + assert isinstance(device.properties.compute_mode, int) + assert isinstance(device.properties.max_texture_1d, int) + assert isinstance(device.properties.max_texture_1d_mipmap, int) + assert isinstance(device.properties.max_texture_1d_linear, int) + assert isinstance(device.properties.max_texture_2d, tuple) + assert isinstance(device.properties.max_texture_2d_mipmap, tuple) + assert isinstance(device.properties.max_texture_2d_linear, tuple) + assert isinstance(device.properties.max_texture_2d_gather, tuple) + assert isinstance(device.properties.max_texture_3d, tuple) + assert isinstance(device.properties.max_texture_3d_alt, tuple) + assert isinstance(device.properties.max_texture_cubemap, int) + assert isinstance(device.properties.max_texture_1d_layered, tuple) + assert isinstance(device.properties.max_texture_2d_layered, tuple) + assert isinstance(device.properties.max_texture_cubemap_layered, tuple) + assert isinstance(device.properties.max_surface_1d, int) + assert isinstance(device.properties.max_surface_2d, tuple) + assert isinstance(device.properties.max_surface_3d, tuple) + assert isinstance(device.properties.max_surface_1d_layered, tuple) + assert isinstance(device.properties.max_surface_2d_layered, tuple) + assert isinstance(device.properties.max_surface_cubemap, int) + assert isinstance(device.properties.max_surface_cubemap_layered, tuple) + assert isinstance(device.properties.surface_alignment, int) + assert isinstance(device.properties.concurrent_kernels, int) + assert isinstance(device.properties.ecc_enabled, int) + assert isinstance(device.properties.pci_bus_id, int) + assert isinstance(device.properties.pci_device_id, int) + assert isinstance(device.properties.pci_domain_id, int) + assert isinstance(device.properties.tcc_driver, int) + assert isinstance(device.properties.async_engine_count, int) + assert isinstance(device.properties.unified_addressing, int) + assert isinstance(device.properties.memory_clock_rate, int) + assert isinstance(device.properties.memory_bus_width, int) + assert isinstance(device.properties.l2_cache_size, int) + assert isinstance(device.properties.persisting_l2_cache_max_size, int) + assert isinstance(device.properties.max_threads_per_multi_processor, int) + assert isinstance(device.properties.stream_priorities_supported, int) + assert isinstance(device.properties.global_l1_cache_supported, int) + assert isinstance(device.properties.local_l1_cache_supported, int) + assert isinstance(device.properties.shared_mem_per_multiprocessor, int) + assert isinstance(device.properties.regs_per_multiprocessor, int) + assert isinstance(device.properties.managed_memory, int) + assert isinstance(device.properties.is_multi_gpu_board, int) + assert isinstance(device.properties.multi_gpu_board_group_id, int) + assert isinstance(device.properties.single_to_double_precision_perf_ratio, int) + assert isinstance(device.properties.pageable_memory_access, int) + assert isinstance(device.properties.concurrent_managed_access, int) + assert isinstance(device.properties.compute_preemption_supported, int) + assert isinstance(device.properties.can_use_host_pointer_for_registered_mem, int) + assert isinstance(device.properties.cooperative_launch, int) + assert isinstance(device.properties.cooperative_multi_device_launch, int) + assert isinstance(device.properties.pageable_memory_access_uses_host_page_tables, int) + assert isinstance(device.properties.direct_managed_mem_access_from_host, int) + assert isinstance(device.properties.access_policy_max_window_size, int) + assert isinstance(device.properties.reserved_shared_mem_per_block, int) + assert isinstance(device.properties.host_register_supported, int) + assert isinstance(device.properties.sparse_cuda_array_supported, int) + assert isinstance(device.properties.host_register_read_only_supported, int) + assert isinstance(device.properties.timeline_semaphore_interop_supported, int) + assert isinstance(device.properties.memory_pools_supported, int) + assert isinstance(device.properties.gpu_direct_rdma_supported, int) + assert isinstance(device.properties.gpu_direct_rdma_flush_writes_options, int) + assert isinstance(device.properties.gpu_direct_rdma_writes_ordering, int) + assert isinstance(device.properties.memory_pool_supported_handle_types, int) + assert isinstance(device.properties.deferred_mapping_cuda_array_supported, int) + assert isinstance(device.properties.ipc_event_supported, int) + assert isinstance(device.properties.unified_function_pointers, int) From c3276f6a3c0367c727c990e3965db5aa360a3f77 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 17 Jan 2025 11:55:10 -0800 Subject: [PATCH 02/10] tweaks --- cuda_core/docs/source/api_private.rst | 1 + cuda_core/docs/source/release/0.2.0-notes.rst | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index f100eb7c..45d32808 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -16,6 +16,7 @@ CUDA runtime _memory.Buffer _stream.Stream _event.Event + _device.DeviceProperties CUDA compilation toolchain diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 572567fd..e686b504 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -1,7 +1,14 @@ ``cuda.core`` v0.2.0 Release notes ================================== +Highlights +---------- + +- :class:`DeviceProperties` class added to provide pythonic access to device properties + + Breaking changes ---------------- -- Change ``__cuda_stream__`` from attribute to method \ No newline at end of file +- Change ``__cuda_stream__`` from attribute to method +- Device.properties now provides an instance of :class:`DeviceProperties` instead of a dictionary. \ No newline at end of file From 8ce2d0351fae8a91d433910f3019a3a87f89323a Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 17 Jan 2025 15:25:54 -0800 Subject: [PATCH 03/10] support the full list of properties --- cuda_core/cuda/core/experimental/_device.py | 18 ++ cuda_core/tests/test_device.py | 197 +++++++++++--------- 2 files changed, 127 insertions(+), 88 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index d5875bba..82e5473c 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -43,6 +43,8 @@ class DeviceProperties: Maximum size of each dimension of a grid. clock_rate : int Clock frequency in kilohertz. + cluster_launch : int + Indicates device supports cluster launch total_const_mem : int Total amount of constant memory available on the device in bytes. major : int @@ -174,6 +176,8 @@ class DeviceProperties: cooperative_multi_device_launch : int 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0 otherwise. + sharedMemPerBlockOptin : int + The per device maximum shared memory per block usable by special opt in pageable_memory_access_uses_host_page_tables : int 1 if the device accesses pageable memory via the host's page tables, and 0 otherwise. direct_managed_mem_access_from_host : int @@ -207,6 +211,14 @@ class DeviceProperties: 1 if the device supports IPC Events, and 0 otherwise. unified_function_pointers : int 1 if the device supports unified pointers, and 0 otherwise. + host_native_atomic_supported : int + 1 if the link between the device and the host supports native atomic operations. + luid : bytes + 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms. + luid_device_node_mask : int + LUID device node mask. Value is undefined on TCC and non-Windows platforms. + max_blocks_per_multi_processor : int + Maximum number of resident blocks per multiprocessor. """ def _init(device_id): @@ -225,6 +237,7 @@ def _init(device_id): self.max_threads_dim = tuple(prop.maxThreadsDim) self.max_grid_size = tuple(prop.maxGridSize) self.clock_rate = prop.clockRate + self.cluster_launch = prop.clusterLaunch self.total_const_mem = prop.totalConstMem self.major = prop.major self.minor = prop.minor @@ -285,6 +298,7 @@ def _init(device_id): self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem self.cooperative_launch = prop.cooperativeLaunch self.cooperative_multi_device_launch = prop.cooperativeMultiDeviceLaunch + self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize @@ -301,6 +315,10 @@ def _init(device_id): self.deferred_mapping_cuda_array_supported = prop.deferredMappingCudaArraySupported self.ipc_event_supported = prop.ipcEventSupported self.unified_function_pointers = prop.unifiedFunctionPointers + self.host_native_atomic_supported = prop.hostNativeAtomicSupported + self.luid = prop.luid + self.luid_device_node_mask = prop.luidDeviceNodeMask + self.max_blocks_per_multi_processor = prop.maxBlocksPerMultiProcessor return self def __init__(self, device_id): diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 571b7dd8..2b0ffe39 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -11,6 +11,7 @@ except ImportError: from cuda import cuda as driver from cuda import cudart as runtime +import pytest from cuda.core.experimental import Device from cuda.core.experimental._utils import ComputeCapability, handle_return @@ -86,92 +87,112 @@ def test_device_property_values(): assert device.properties.uuid.hex() == device.uuid.replace("-", "") -def test_device_property_types(): +cuda_base_properties = [ + ("name", str), + ("uuid", bytes), + ("total_global_mem", int), + ("shared_mem_per_block", int), + ("regs_per_block", int), + ("warp_size", int), + ("mem_pitch", int), + ("max_threads_per_block", int), + ("max_threads_dim", tuple), + ("max_grid_size", tuple), + ("clock_rate", int), + ("total_const_mem", int), + ("major", int), + ("minor", int), + ("texture_alignment", int), + ("texture_pitch_alignment", int), + ("device_overlap", int), + ("multi_processor_count", int), + ("kernel_exec_timeout_enabled", int), + ("integrated", int), + ("can_map_host_memory", int), + ("compute_mode", int), + ("max_texture_1d", int), + ("max_texture_1d_mipmap", int), + ("max_texture_1d_linear", int), + ("max_texture_2d", tuple), + ("max_texture_2d_mipmap", tuple), + ("max_texture_2d_linear", tuple), + ("max_texture_2d_gather", tuple), + ("max_texture_3d", tuple), + ("max_texture_3d_alt", tuple), + ("max_texture_cubemap", int), + ("max_texture_1d_layered", tuple), + ("max_texture_2d_layered", tuple), + ("max_texture_cubemap_layered", tuple), + ("max_surface_1d", int), + ("max_surface_2d", tuple), + ("max_surface_3d", tuple), + ("max_surface_1d_layered", tuple), + ("max_surface_2d_layered", tuple), + ("max_surface_cubemap", int), + ("max_surface_cubemap_layered", tuple), + ("surface_alignment", int), + ("concurrent_kernels", int), + ("ecc_enabled", int), + ("pci_bus_id", int), + ("pci_device_id", int), + ("pci_domain_id", int), + ("tcc_driver", int), + ("async_engine_count", int), + ("unified_addressing", int), + ("memory_clock_rate", int), + ("memory_bus_width", int), + ("l2_cache_size", int), + ("persisting_l2_cache_max_size", int), + ("max_threads_per_multi_processor", int), + ("stream_priorities_supported", int), + ("global_l1_cache_supported", int), + ("local_l1_cache_supported", int), + ("shared_mem_per_multiprocessor", int), + ("regs_per_multiprocessor", int), + ("managed_memory", int), + ("is_multi_gpu_board", int), + ("multi_gpu_board_group_id", int), + ("single_to_double_precision_perf_ratio", int), + ("pageable_memory_access", int), + ("concurrent_managed_access", int), + ("compute_preemption_supported", int), + ("can_use_host_pointer_for_registered_mem", int), + ("cooperative_launch", int), + ("cooperative_multi_device_launch", int), + ("pageable_memory_access_uses_host_page_tables", int), + ("direct_managed_mem_access_from_host", int), + ("access_policy_max_window_size", int), + ("reserved_shared_mem_per_block", int), + ("host_register_supported", int), + ("sparse_cuda_array_supported", int), + ("host_register_read_only_supported", int), + ("timeline_semaphore_interop_supported", int), + ("memory_pools_supported", int), + ("gpu_direct_rdma_supported", int), + ("gpu_direct_rdma_flush_writes_options", int), + ("gpu_direct_rdma_writes_ordering", int), + ("memory_pool_supported_handle_types", int), + ("deferred_mapping_cuda_array_supported", int), + ("ipc_event_supported", int), + ("unified_function_pointers", int), +] + +cuda_12_properties = [ + ("host_native_atomic_supported", int), + ("luid", bytes), + ("luid_device_node_mask", int), + ("max_blocks_per_multi_processor", int), + ("shared_mem_per_block_optin", int), + ("cluster_launch", int), +] + + +driver_ver = handle_return(driver.cuDriverGetVersion()) +if driver_ver >= 12000: + cuda_base_properties += cuda_12_properties + + +@pytest.mark.parametrize("property_name, expected_type", cuda_base_properties) +def test_device_property_types(property_name, expected_type): device = Device() - assert isinstance(device.properties.name, str) - assert isinstance(device.properties.uuid, bytes) - assert isinstance(device.properties.total_global_mem, int) - assert isinstance(device.properties.shared_mem_per_block, int) - assert isinstance(device.properties.regs_per_block, int) - assert isinstance(device.properties.warp_size, int) - assert isinstance(device.properties.mem_pitch, int) - assert isinstance(device.properties.max_threads_per_block, int) - assert isinstance(device.properties.max_threads_dim, tuple) - assert isinstance(device.properties.max_grid_size, tuple) - assert isinstance(device.properties.clock_rate, int) - assert isinstance(device.properties.total_const_mem, int) - assert isinstance(device.properties.major, int) - assert isinstance(device.properties.minor, int) - assert isinstance(device.properties.texture_alignment, int) - assert isinstance(device.properties.texture_pitch_alignment, int) - assert isinstance(device.properties.device_overlap, int) - assert isinstance(device.properties.multi_processor_count, int) - assert isinstance(device.properties.kernel_exec_timeout_enabled, int) - assert isinstance(device.properties.integrated, int) - assert isinstance(device.properties.can_map_host_memory, int) - assert isinstance(device.properties.compute_mode, int) - assert isinstance(device.properties.max_texture_1d, int) - assert isinstance(device.properties.max_texture_1d_mipmap, int) - assert isinstance(device.properties.max_texture_1d_linear, int) - assert isinstance(device.properties.max_texture_2d, tuple) - assert isinstance(device.properties.max_texture_2d_mipmap, tuple) - assert isinstance(device.properties.max_texture_2d_linear, tuple) - assert isinstance(device.properties.max_texture_2d_gather, tuple) - assert isinstance(device.properties.max_texture_3d, tuple) - assert isinstance(device.properties.max_texture_3d_alt, tuple) - assert isinstance(device.properties.max_texture_cubemap, int) - assert isinstance(device.properties.max_texture_1d_layered, tuple) - assert isinstance(device.properties.max_texture_2d_layered, tuple) - assert isinstance(device.properties.max_texture_cubemap_layered, tuple) - assert isinstance(device.properties.max_surface_1d, int) - assert isinstance(device.properties.max_surface_2d, tuple) - assert isinstance(device.properties.max_surface_3d, tuple) - assert isinstance(device.properties.max_surface_1d_layered, tuple) - assert isinstance(device.properties.max_surface_2d_layered, tuple) - assert isinstance(device.properties.max_surface_cubemap, int) - assert isinstance(device.properties.max_surface_cubemap_layered, tuple) - assert isinstance(device.properties.surface_alignment, int) - assert isinstance(device.properties.concurrent_kernels, int) - assert isinstance(device.properties.ecc_enabled, int) - assert isinstance(device.properties.pci_bus_id, int) - assert isinstance(device.properties.pci_device_id, int) - assert isinstance(device.properties.pci_domain_id, int) - assert isinstance(device.properties.tcc_driver, int) - assert isinstance(device.properties.async_engine_count, int) - assert isinstance(device.properties.unified_addressing, int) - assert isinstance(device.properties.memory_clock_rate, int) - assert isinstance(device.properties.memory_bus_width, int) - assert isinstance(device.properties.l2_cache_size, int) - assert isinstance(device.properties.persisting_l2_cache_max_size, int) - assert isinstance(device.properties.max_threads_per_multi_processor, int) - assert isinstance(device.properties.stream_priorities_supported, int) - assert isinstance(device.properties.global_l1_cache_supported, int) - assert isinstance(device.properties.local_l1_cache_supported, int) - assert isinstance(device.properties.shared_mem_per_multiprocessor, int) - assert isinstance(device.properties.regs_per_multiprocessor, int) - assert isinstance(device.properties.managed_memory, int) - assert isinstance(device.properties.is_multi_gpu_board, int) - assert isinstance(device.properties.multi_gpu_board_group_id, int) - assert isinstance(device.properties.single_to_double_precision_perf_ratio, int) - assert isinstance(device.properties.pageable_memory_access, int) - assert isinstance(device.properties.concurrent_managed_access, int) - assert isinstance(device.properties.compute_preemption_supported, int) - assert isinstance(device.properties.can_use_host_pointer_for_registered_mem, int) - assert isinstance(device.properties.cooperative_launch, int) - assert isinstance(device.properties.cooperative_multi_device_launch, int) - assert isinstance(device.properties.pageable_memory_access_uses_host_page_tables, int) - assert isinstance(device.properties.direct_managed_mem_access_from_host, int) - assert isinstance(device.properties.access_policy_max_window_size, int) - assert isinstance(device.properties.reserved_shared_mem_per_block, int) - assert isinstance(device.properties.host_register_supported, int) - assert isinstance(device.properties.sparse_cuda_array_supported, int) - assert isinstance(device.properties.host_register_read_only_supported, int) - assert isinstance(device.properties.timeline_semaphore_interop_supported, int) - assert isinstance(device.properties.memory_pools_supported, int) - assert isinstance(device.properties.gpu_direct_rdma_supported, int) - assert isinstance(device.properties.gpu_direct_rdma_flush_writes_options, int) - assert isinstance(device.properties.gpu_direct_rdma_writes_ordering, int) - assert isinstance(device.properties.memory_pool_supported_handle_types, int) - assert isinstance(device.properties.deferred_mapping_cuda_array_supported, int) - assert isinstance(device.properties.ipc_event_supported, int) - assert isinstance(device.properties.unified_function_pointers, int) + assert isinstance(getattr(device.properties, property_name), expected_type) From afb7ed8fd75f3a0f8de85eef963897f97c162a8f Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 17 Jan 2025 15:32:44 -0800 Subject: [PATCH 04/10] remove deprecated properties --- cuda_core/cuda/core/experimental/_device.py | 24 --------------------- cuda_core/tests/test_device.py | 8 ------- 2 files changed, 32 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 82e5473c..1433cab5 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -56,25 +56,17 @@ class DeviceProperties: offset applied to texture fetches. texture_pitch_alignment : int Pitch alignment requirement for 2D texture references that are bound to pitched memory. - device_overlap : int - 1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not. multi_processor_count : int Number of multiprocessors on the device. - kernel_exec_timeout_enabled : int - 1 if there is a run time limit for kernels executed on the device, or 0 if not. integrated : int 1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component. can_map_host_memory : int 1 if the device can map host memory into the CUDA address space for use with cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not. - compute_mode : int - Compute mode that the device is currently in. max_texture_1d : int Maximum 1D texture size. max_texture_1d_mipmap : int Maximum 1D mipmapped texture size. - max_texture_1d_linear : int - Maximum 1D texture size for textures bound to linear memory. max_texture_2d : tuple Maximum 2D texture dimensions. max_texture_2d_mipmap : tuple @@ -130,8 +122,6 @@ class DeviceProperties: and execute a kernel at the same time. It is 0 if neither of these is supported. unified_addressing : int 1 if the device shares a unified address space with the host and 0 otherwise. - memory_clock_rate : int - Peak memory clock frequency in kilohertz. memory_bus_width : int Memory bus width in bits. l2_cache_size : int @@ -159,9 +149,6 @@ class DeviceProperties: multi_gpu_board_group_id : int Unique identifier for a group of devices associated with the same board. Devices on the same multi-GPU board will share the same identifier. - single_to_double_precision_perf_ratio : int - Ratio of single precision performance (in floating-point operations per second) to double precision - performance. pageable_memory_access : int 1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it, and 0 otherwise. @@ -173,9 +160,6 @@ class DeviceProperties: 1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise. cooperative_launch : int 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise. - cooperative_multi_device_launch : int - 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0 - otherwise. sharedMemPerBlockOptin : int The per device maximum shared memory per block usable by special opt in pageable_memory_access_uses_host_page_tables : int @@ -236,22 +220,17 @@ def _init(device_id): self.max_threads_per_block = prop.maxThreadsPerBlock self.max_threads_dim = tuple(prop.maxThreadsDim) self.max_grid_size = tuple(prop.maxGridSize) - self.clock_rate = prop.clockRate self.cluster_launch = prop.clusterLaunch self.total_const_mem = prop.totalConstMem self.major = prop.major self.minor = prop.minor self.texture_alignment = prop.textureAlignment self.texture_pitch_alignment = prop.texturePitchAlignment - self.device_overlap = prop.deviceOverlap self.multi_processor_count = prop.multiProcessorCount - self.kernel_exec_timeout_enabled = prop.kernelExecTimeoutEnabled self.integrated = prop.integrated self.can_map_host_memory = prop.canMapHostMemory - self.compute_mode = prop.computeMode self.max_texture_1d = prop.maxTexture1D self.max_texture_1d_mipmap = prop.maxTexture1DMipmap - self.max_texture_1d_linear = prop.maxTexture1DLinear self.max_texture_2d = tuple(prop.maxTexture2D) self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap) self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear) @@ -278,7 +257,6 @@ def _init(device_id): self.tcc_driver = prop.tccDriver self.async_engine_count = prop.asyncEngineCount self.unified_addressing = prop.unifiedAddressing - self.memory_clock_rate = prop.memoryClockRate self.memory_bus_width = prop.memoryBusWidth self.l2_cache_size = prop.l2CacheSize self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize @@ -291,13 +269,11 @@ def _init(device_id): self.managed_memory = prop.managedMemory self.is_multi_gpu_board = prop.isMultiGpuBoard self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID - self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio self.pageable_memory_access = prop.pageableMemoryAccess self.concurrent_managed_access = prop.concurrentManagedAccess self.compute_preemption_supported = prop.computePreemptionSupported self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem self.cooperative_launch = prop.cooperativeLaunch - self.cooperative_multi_device_launch = prop.cooperativeMultiDeviceLaunch self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 2b0ffe39..4d7aa2cf 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -98,21 +98,16 @@ def test_device_property_values(): ("max_threads_per_block", int), ("max_threads_dim", tuple), ("max_grid_size", tuple), - ("clock_rate", int), ("total_const_mem", int), ("major", int), ("minor", int), ("texture_alignment", int), ("texture_pitch_alignment", int), - ("device_overlap", int), ("multi_processor_count", int), - ("kernel_exec_timeout_enabled", int), ("integrated", int), ("can_map_host_memory", int), - ("compute_mode", int), ("max_texture_1d", int), ("max_texture_1d_mipmap", int), - ("max_texture_1d_linear", int), ("max_texture_2d", tuple), ("max_texture_2d_mipmap", tuple), ("max_texture_2d_linear", tuple), @@ -139,7 +134,6 @@ def test_device_property_values(): ("tcc_driver", int), ("async_engine_count", int), ("unified_addressing", int), - ("memory_clock_rate", int), ("memory_bus_width", int), ("l2_cache_size", int), ("persisting_l2_cache_max_size", int), @@ -152,13 +146,11 @@ def test_device_property_values(): ("managed_memory", int), ("is_multi_gpu_board", int), ("multi_gpu_board_group_id", int), - ("single_to_double_precision_perf_ratio", int), ("pageable_memory_access", int), ("concurrent_managed_access", int), ("compute_preemption_supported", int), ("can_use_host_pointer_for_registered_mem", int), ("cooperative_launch", int), - ("cooperative_multi_device_launch", int), ("pageable_memory_access_uses_host_page_tables", int), ("direct_managed_mem_access_from_host", int), ("access_policy_max_window_size", int), From b2b2bafa35a3fea4ba6055472c20c5763b43b8ba Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 17 Jan 2025 15:40:34 -0800 Subject: [PATCH 05/10] convert to bools where appropriate --- cuda_core/cuda/core/experimental/_device.py | 148 ++++++++++++-------- cuda_core/tests/test_device.py | 58 ++++---- 2 files changed, 115 insertions(+), 91 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 1433cab5..6b7e6ae4 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -28,6 +28,8 @@ class DeviceProperties: Total amount of global memory available on the device in bytes. shared_mem_per_block : int Maximum amount of shared memory available to a thread block in bytes. + shared_mem_per_block_optin : int + Maximum shared memory per block usable by special opt in regs_per_block : int Maximum number of 32-bit registers available to a thread block. warp_size : int @@ -43,8 +45,8 @@ class DeviceProperties: Maximum size of each dimension of a grid. clock_rate : int Clock frequency in kilohertz. - cluster_launch : int - Indicates device supports cluster launch + cluster_launch : bool + Indicates device supports cluster launch. total_const_mem : int Total amount of constant memory available on the device in bytes. major : int @@ -56,17 +58,25 @@ class DeviceProperties: offset applied to texture fetches. texture_pitch_alignment : int Pitch alignment requirement for 2D texture references that are bound to pitched memory. + device_overlap : bool + 1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not. multi_processor_count : int Number of multiprocessors on the device. - integrated : int + kernel_exec_timeout_enabled : bool + 1 if there is a run time limit for kernels executed on the device, or 0 if not. + integrated : bool 1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component. - can_map_host_memory : int + can_map_host_memory : bool 1 if the device can map host memory into the CUDA address space for use with cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not. + compute_mode : int + Compute mode that the device is currently in. max_texture_1d : int Maximum 1D texture size. max_texture_1d_mipmap : int Maximum 1D mipmapped texture size. + max_texture_1d_linear : int + Maximum 1D texture size for textures bound to linear memory. max_texture_2d : tuple Maximum 2D texture dimensions. max_texture_2d_mipmap : tuple @@ -103,10 +113,10 @@ class DeviceProperties: Maximum cubemap layered surface dimensions. surface_alignment : int Alignment requirements for surfaces. - concurrent_kernels : int + concurrent_kernels : bool 1 if the device supports executing multiple kernels within the same context simultaneously, or 0 if not. - ecc_enabled : int + ecc_enabled : bool 1 if the device has ECC support turned on, or 0 if not. pci_bus_id : int PCI bus identifier of the device. @@ -114,14 +124,16 @@ class DeviceProperties: PCI device (sometimes called slot) identifier of the device. pci_domain_id : int PCI domain identifier of the device. - tcc_driver : int + tcc_driver : bool 1 if the device is using a TCC driver or 0 if not. async_engine_count : int 1 when the device can concurrently copy memory between host and device while executing a kernel. It is 2 when the device can concurrently copy memory between host and device in both directions and execute a kernel at the same time. It is 0 if neither of these is supported. - unified_addressing : int + unified_addressing : bool 1 if the device shares a unified address space with the host and 0 otherwise. + memory_clock_rate : int + Peak memory clock frequency in kilohertz. memory_bus_width : int Memory bus width in bits. l2_cache_size : int @@ -130,11 +142,11 @@ class DeviceProperties: L2 cache's maximum persisting lines size in bytes. max_threads_per_multi_processor : int Number of maximum resident threads per multiprocessor. - stream_priorities_supported : int + stream_priorities_supported : bool 1 if the device supports stream priorities, or 0 if it is not supported. - global_l1_cache_supported : int + global_l1_cache_supported : bool 1 if the device supports caching of globals in L1 cache, or 0 if it is not supported. - local_l1_cache_supported : int + local_l1_cache_supported : bool 1 if the device supports caching of locals in L1 cache, or 0 if it is not supported. shared_mem_per_multiprocessor : int Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all @@ -142,46 +154,50 @@ class DeviceProperties: regs_per_multiprocessor : int Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread blocks simultaneously resident on a multiprocessor. - managed_memory : int + managed_memory : bool 1 if the device supports allocating managed memory on this system, or 0 if it is not supported. - is_multi_gpu_board : int + is_multi_gpu_board : bool 1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not. multi_gpu_board_group_id : int Unique identifier for a group of devices associated with the same board. Devices on the same multi-GPU board will share the same identifier. - pageable_memory_access : int + single_to_double_precision_perf_ratio : int + Ratio of single precision performance (in floating-point operations per second) to double precision + performance. + pageable_memory_access : bool 1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it, and 0 otherwise. - concurrent_managed_access : int + concurrent_managed_access : bool 1 if the device can coherently access managed memory concurrently with the CPU, and 0 otherwise. - compute_preemption_supported : int + compute_preemption_supported : bool 1 if the device supports Compute Preemption, and 0 otherwise. - can_use_host_pointer_for_registered_mem : int + can_use_host_pointer_for_registered_mem : bool 1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise. - cooperative_launch : int + cooperative_launch : bool 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise. - sharedMemPerBlockOptin : int - The per device maximum shared memory per block usable by special opt in - pageable_memory_access_uses_host_page_tables : int + cooperative_multi_device_launch : bool + 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0 + otherwise. + pageable_memory_access_uses_host_page_tables : bool 1 if the device accesses pageable memory via the host's page tables, and 0 otherwise. - direct_managed_mem_access_from_host : int + direct_managed_mem_access_from_host : bool 1 if the host can directly access managed memory on the device without migration, and 0 otherwise. access_policy_max_window_size : int Maximum value of cudaAccessPolicyWindow::num_bytes. reserved_shared_mem_per_block : int Shared memory reserved by CUDA driver per block in bytes. - host_register_supported : int + host_register_supported : bool 1 if the device supports host memory registration via cudaHostRegister, and 0 otherwise. - sparse_cuda_array_supported : int + sparse_cuda_array_supported : bool 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise. - host_register_read_only_supported : int + host_register_read_only_supported : bool 1 if the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU. - timeline_semaphore_interop_supported : int + timeline_semaphore_interop_supported : bool 1 if external timeline semaphore interop is supported on the device, 0 otherwise. - memory_pools_supported : int + memory_pools_supported : bool 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise. - gpu_direct_rdma_supported : int + gpu_direct_rdma_supported : bool 1 if the device supports GPUDirect RDMA APIs, 0 otherwise. gpu_direct_rdma_flush_writes_options : int Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum. @@ -189,13 +205,13 @@ class DeviceProperties: See the cudaGPUDirectRDMAWritesOrdering enum for numerical values. memory_pool_supported_handle_types : int Bitmask of handle types supported with mempool-based IPC. - deferred_mapping_cuda_array_supported : int + deferred_mapping_cuda_array_supported : bool 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. - ipc_event_supported : int + ipc_event_supported : bool 1 if the device supports IPC Events, and 0 otherwise. - unified_function_pointers : int + unified_function_pointers : bool 1 if the device supports unified pointers, and 0 otherwise. - host_native_atomic_supported : int + host_native_atomic_supported : bool 1 if the link between the device and the host supports native atomic operations. luid : bytes 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms. @@ -214,23 +230,29 @@ def _init(device_id): self.uuid = prop.uuid.bytes self.total_global_mem = prop.totalGlobalMem self.shared_mem_per_block = prop.sharedMemPerBlock + self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin self.regs_per_block = prop.regsPerBlock self.warp_size = prop.warpSize self.mem_pitch = prop.memPitch self.max_threads_per_block = prop.maxThreadsPerBlock self.max_threads_dim = tuple(prop.maxThreadsDim) self.max_grid_size = tuple(prop.maxGridSize) - self.cluster_launch = prop.clusterLaunch + self.clock_rate = prop.clockRate + self.cluster_launch = bool(prop.clusterLaunch) self.total_const_mem = prop.totalConstMem self.major = prop.major self.minor = prop.minor self.texture_alignment = prop.textureAlignment self.texture_pitch_alignment = prop.texturePitchAlignment + self.device_overlap = bool(prop.deviceOverlap) self.multi_processor_count = prop.multiProcessorCount - self.integrated = prop.integrated - self.can_map_host_memory = prop.canMapHostMemory + self.kernel_exec_timeout_enabled = bool(prop.kernelExecTimeoutEnabled) + self.integrated = bool(prop.integrated) + self.can_map_host_memory = bool(prop.canMapHostMemory) + self.compute_mode = prop.computeMode self.max_texture_1d = prop.maxTexture1D self.max_texture_1d_mipmap = prop.maxTexture1DMipmap + self.max_texture_1d_linear = prop.maxTexture1DLinear self.max_texture_2d = tuple(prop.maxTexture2D) self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap) self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear) @@ -249,49 +271,51 @@ def _init(device_id): self.max_surface_cubemap = prop.maxSurfaceCubemap self.max_surface_cubemap_layered = tuple(prop.maxSurfaceCubemapLayered) self.surface_alignment = prop.surfaceAlignment - self.concurrent_kernels = prop.concurrentKernels - self.ecc_enabled = prop.ECCEnabled + self.concurrent_kernels = bool(prop.concurrentKernels) + self.ecc_enabled = bool(prop.ECCEnabled) self.pci_bus_id = prop.pciBusID self.pci_device_id = prop.pciDeviceID self.pci_domain_id = prop.pciDomainID - self.tcc_driver = prop.tccDriver + self.tcc_driver = bool(prop.tccDriver) self.async_engine_count = prop.asyncEngineCount - self.unified_addressing = prop.unifiedAddressing + self.unified_addressing = bool(prop.unifiedAddressing) + self.memory_clock_rate = prop.memoryClockRate self.memory_bus_width = prop.memoryBusWidth self.l2_cache_size = prop.l2CacheSize self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize self.max_threads_per_multi_processor = prop.maxThreadsPerMultiProcessor - self.stream_priorities_supported = prop.streamPrioritiesSupported - self.global_l1_cache_supported = prop.globalL1CacheSupported - self.local_l1_cache_supported = prop.localL1CacheSupported + self.stream_priorities_supported = bool(prop.streamPrioritiesSupported) + self.global_l1_cache_supported = bool(prop.globalL1CacheSupported) + self.local_l1_cache_supported = bool(prop.localL1CacheSupported) self.shared_mem_per_multiprocessor = prop.sharedMemPerMultiprocessor self.regs_per_multiprocessor = prop.regsPerMultiprocessor - self.managed_memory = prop.managedMemory - self.is_multi_gpu_board = prop.isMultiGpuBoard + self.managed_memory = bool(prop.managedMemory) + self.is_multi_gpu_board = bool(prop.isMultiGpuBoard) self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID - self.pageable_memory_access = prop.pageableMemoryAccess - self.concurrent_managed_access = prop.concurrentManagedAccess - self.compute_preemption_supported = prop.computePreemptionSupported - self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem - self.cooperative_launch = prop.cooperativeLaunch - self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin - self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables - self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost + self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio + self.pageable_memory_access = bool(prop.pageableMemoryAccess) + self.concurrent_managed_access = bool(prop.concurrentManagedAccess) + self.compute_preemption_supported = bool(prop.computePreemptionSupported) + self.can_use_host_pointer_for_registered_mem = bool(prop.canUseHostPointerForRegisteredMem) + self.cooperative_launch = bool(prop.cooperativeLaunch) + self.cooperative_multi_device_launch = bool(prop.cooperativeMultiDeviceLaunch) + self.pageable_memory_access_uses_host_page_tables = bool(prop.pageableMemoryAccessUsesHostPageTables) + self.direct_managed_mem_access_from_host = bool(prop.directManagedMemAccessFromHost) self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize self.reserved_shared_mem_per_block = prop.reservedSharedMemPerBlock - self.host_register_supported = prop.hostRegisterSupported - self.sparse_cuda_array_supported = prop.sparseCudaArraySupported - self.host_register_read_only_supported = prop.hostRegisterReadOnlySupported - self.timeline_semaphore_interop_supported = prop.timelineSemaphoreInteropSupported - self.memory_pools_supported = prop.memoryPoolsSupported - self.gpu_direct_rdma_supported = prop.gpuDirectRDMASupported + self.host_register_supported = bool(prop.hostRegisterSupported) + self.sparse_cuda_array_supported = bool(prop.sparseCudaArraySupported) + self.host_register_read_only_supported = bool(prop.hostRegisterReadOnlySupported) + self.timeline_semaphore_interop_supported = bool(prop.timelineSemaphoreInteropSupported) + self.memory_pools_supported = bool(prop.memoryPoolsSupported) + self.gpu_direct_rdma_supported = bool(prop.gpuDirectRDMASupported) self.gpu_direct_rdma_flush_writes_options = prop.gpuDirectRDMAFlushWritesOptions self.gpu_direct_rdma_writes_ordering = prop.gpuDirectRDMAWritesOrdering self.memory_pool_supported_handle_types = prop.memoryPoolSupportedHandleTypes - self.deferred_mapping_cuda_array_supported = prop.deferredMappingCudaArraySupported - self.ipc_event_supported = prop.ipcEventSupported - self.unified_function_pointers = prop.unifiedFunctionPointers - self.host_native_atomic_supported = prop.hostNativeAtomicSupported + self.deferred_mapping_cuda_array_supported = bool(prop.deferredMappingCudaArraySupported) + self.ipc_event_supported = bool(prop.ipcEventSupported) + self.unified_function_pointers = bool(prop.unifiedFunctionPointers) + self.host_native_atomic_supported = bool(prop.hostNativeAtomicSupported) self.luid = prop.luid self.luid_device_node_mask = prop.luidDeviceNodeMask self.max_blocks_per_multi_processor = prop.maxBlocksPerMultiProcessor diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 4d7aa2cf..0a441996 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -104,8 +104,8 @@ def test_device_property_values(): ("texture_alignment", int), ("texture_pitch_alignment", int), ("multi_processor_count", int), - ("integrated", int), - ("can_map_host_memory", int), + ("integrated", bool), + ("can_map_host_memory", bool), ("max_texture_1d", int), ("max_texture_1d_mipmap", int), ("max_texture_2d", tuple), @@ -126,56 +126,56 @@ def test_device_property_values(): ("max_surface_cubemap", int), ("max_surface_cubemap_layered", tuple), ("surface_alignment", int), - ("concurrent_kernels", int), - ("ecc_enabled", int), + ("concurrent_kernels", bool), + ("ecc_enabled", bool), ("pci_bus_id", int), ("pci_device_id", int), ("pci_domain_id", int), - ("tcc_driver", int), + ("tcc_driver", bool), ("async_engine_count", int), - ("unified_addressing", int), + ("unified_addressing", bool), ("memory_bus_width", int), ("l2_cache_size", int), ("persisting_l2_cache_max_size", int), ("max_threads_per_multi_processor", int), - ("stream_priorities_supported", int), - ("global_l1_cache_supported", int), - ("local_l1_cache_supported", int), + ("stream_priorities_supported", bool), + ("global_l1_cache_supported", bool), + ("local_l1_cache_supported", bool), ("shared_mem_per_multiprocessor", int), ("regs_per_multiprocessor", int), - ("managed_memory", int), - ("is_multi_gpu_board", int), + ("managed_memory", bool), + ("is_multi_gpu_board", bool), ("multi_gpu_board_group_id", int), - ("pageable_memory_access", int), - ("concurrent_managed_access", int), - ("compute_preemption_supported", int), - ("can_use_host_pointer_for_registered_mem", int), - ("cooperative_launch", int), - ("pageable_memory_access_uses_host_page_tables", int), - ("direct_managed_mem_access_from_host", int), + ("pageable_memory_access", bool), + ("concurrent_managed_access", bool), + ("compute_preemption_supported", bool), + ("can_use_host_pointer_for_registered_mem", bool), + ("cooperative_launch", bool), + ("pageable_memory_access_uses_host_page_tables", bool), + ("direct_managed_mem_access_from_host", bool), ("access_policy_max_window_size", int), ("reserved_shared_mem_per_block", int), - ("host_register_supported", int), - ("sparse_cuda_array_supported", int), - ("host_register_read_only_supported", int), - ("timeline_semaphore_interop_supported", int), - ("memory_pools_supported", int), - ("gpu_direct_rdma_supported", int), + ("host_register_supported", bool), + ("sparse_cuda_array_supported", bool), + ("host_register_read_only_supported", bool), + ("timeline_semaphore_interop_supported", bool), + ("memory_pools_supported", bool), + ("gpu_direct_rdma_supported", bool), ("gpu_direct_rdma_flush_writes_options", int), ("gpu_direct_rdma_writes_ordering", int), ("memory_pool_supported_handle_types", int), - ("deferred_mapping_cuda_array_supported", int), - ("ipc_event_supported", int), - ("unified_function_pointers", int), + ("deferred_mapping_cuda_array_supported", bool), + ("ipc_event_supported", bool), + ("unified_function_pointers", bool), ] cuda_12_properties = [ - ("host_native_atomic_supported", int), + ("host_native_atomic_supported", bool), ("luid", bytes), ("luid_device_node_mask", int), ("max_blocks_per_multi_processor", int), ("shared_mem_per_block_optin", int), - ("cluster_launch", int), + ("cluster_launch", bool), ] From 373331c21d7fca05409a242d60285b6377ec7208 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 17 Jan 2025 16:05:33 -0800 Subject: [PATCH 06/10] convert to bools where appropriate --- cuda_core/cuda/core/experimental/_device.py | 81 ++++++++++----------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 6b7e6ae4..6ff5a8a7 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -28,8 +28,6 @@ class DeviceProperties: Total amount of global memory available on the device in bytes. shared_mem_per_block : int Maximum amount of shared memory available to a thread block in bytes. - shared_mem_per_block_optin : int - Maximum shared memory per block usable by special opt in regs_per_block : int Maximum number of 32-bit registers available to a thread block. warp_size : int @@ -46,7 +44,7 @@ class DeviceProperties: clock_rate : int Clock frequency in kilohertz. cluster_launch : bool - Indicates device supports cluster launch. + Indicates whether or not the device supports cluster launch. total_const_mem : int Total amount of constant memory available on the device in bytes. major : int @@ -59,16 +57,17 @@ class DeviceProperties: texture_pitch_alignment : int Pitch alignment requirement for 2D texture references that are bound to pitched memory. device_overlap : bool - 1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not. + Indicates whether or not the device can concurrently copy memory between host and device while executing + a kernel. multi_processor_count : int Number of multiprocessors on the device. kernel_exec_timeout_enabled : bool - 1 if there is a run time limit for kernels executed on the device, or 0 if not. + Indicates whether or not there is a run time limit for kernels executed on the device. integrated : bool - 1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component. + Indicates whether or not the device is an integrated (motherboard) GPU. can_map_host_memory : bool - 1 if the device can map host memory into the CUDA address space for use with - cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not. + Indicates whether or not the device can map host memory into the CUDA address space for use with + cudaHostAlloc()/cudaHostGetDevicePointer(). compute_mode : int Compute mode that the device is currently in. max_texture_1d : int @@ -114,10 +113,10 @@ class DeviceProperties: surface_alignment : int Alignment requirements for surfaces. concurrent_kernels : bool - 1 if the device supports executing multiple kernels within the same context simultaneously, or 0 if - not. + Indicates whether or not the device supports executing multiple kernels within the same context + simultaneously. ecc_enabled : bool - 1 if the device has ECC support turned on, or 0 if not. + Indicates whether or not the device has ECC support turned on. pci_bus_id : int PCI bus identifier of the device. pci_device_id : int @@ -125,13 +124,13 @@ class DeviceProperties: pci_domain_id : int PCI domain identifier of the device. tcc_driver : bool - 1 if the device is using a TCC driver or 0 if not. + Indicates whether or not the device is using a TCC driver. async_engine_count : int 1 when the device can concurrently copy memory between host and device while executing a kernel. It is 2 when the device can concurrently copy memory between host and device in both directions and execute a kernel at the same time. It is 0 if neither of these is supported. unified_addressing : bool - 1 if the device shares a unified address space with the host and 0 otherwise. + Indicates whether or not the device shares a unified address space with the host. memory_clock_rate : int Peak memory clock frequency in kilohertz. memory_bus_width : int @@ -143,11 +142,11 @@ class DeviceProperties: max_threads_per_multi_processor : int Number of maximum resident threads per multiprocessor. stream_priorities_supported : bool - 1 if the device supports stream priorities, or 0 if it is not supported. + Indicates whether or not the device supports stream priorities. global_l1_cache_supported : bool - 1 if the device supports caching of globals in L1 cache, or 0 if it is not supported. + Indicates whether or not the device supports caching of globals in L1 cache. local_l1_cache_supported : bool - 1 if the device supports caching of locals in L1 cache, or 0 if it is not supported. + Indicates whether or not the device supports caching of locals in L1 cache. shared_mem_per_multiprocessor : int Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all thread blocks simultaneously resident on a multiprocessor. @@ -155,9 +154,9 @@ class DeviceProperties: Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread blocks simultaneously resident on a multiprocessor. managed_memory : bool - 1 if the device supports allocating managed memory on this system, or 0 if it is not supported. + Indicates whether or not the device supports allocating managed memory on this system. is_multi_gpu_board : bool - 1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not. + Indicates whether or not the device is on a multi-GPU board (e.g. Gemini cards). multi_gpu_board_group_id : int Unique identifier for a group of devices associated with the same board. Devices on the same multi-GPU board will share the same identifier. @@ -165,40 +164,43 @@ class DeviceProperties: Ratio of single precision performance (in floating-point operations per second) to double precision performance. pageable_memory_access : bool - 1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it, - and 0 otherwise. + Indicates whether or not the device supports coherently accessing pageable memory without calling + cudaHostRegister on it. concurrent_managed_access : bool - 1 if the device can coherently access managed memory concurrently with the CPU, and 0 otherwise. + Indicates whether or not the device can coherently access managed memory concurrently with the CPU. compute_preemption_supported : bool - 1 if the device supports Compute Preemption, and 0 otherwise. + Indicates whether or not the device supports Compute Preemption. can_use_host_pointer_for_registered_mem : bool - 1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise. + Indicates whether or not the device can access host registered memory at the same virtual address as + the CPU. cooperative_launch : bool - 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise. + Indicates whether or not the device supports launching cooperative kernels via + cudaLaunchCooperativeKernel. cooperative_multi_device_launch : bool - 1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0 - otherwise. + Indicates whether or not the device supports launching cooperative kernels via + cudaLaunchCooperativeKernelMultiDevice. pageable_memory_access_uses_host_page_tables : bool - 1 if the device accesses pageable memory via the host's page tables, and 0 otherwise. + Indicates whether or not the device accesses pageable memory via the host's page tables. direct_managed_mem_access_from_host : bool - 1 if the host can directly access managed memory on the device without migration, and 0 otherwise. + Indicates whether or not the host can directly access managed memory on the device without migration. access_policy_max_window_size : int Maximum value of cudaAccessPolicyWindow::num_bytes. reserved_shared_mem_per_block : int Shared memory reserved by CUDA driver per block in bytes. host_register_supported : bool - 1 if the device supports host memory registration via cudaHostRegister, and 0 otherwise. + Indicates whether or not the device supports host memory registration via cudaHostRegister. sparse_cuda_array_supported : bool - 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise. + Indicates whether or not the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays. host_register_read_only_supported : bool - 1 if the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory + Indicates whether or not the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly + to register memory that must be mapped as read-only to the GPU. timeline_semaphore_interop_supported : bool - 1 if external timeline semaphore interop is supported on the device, 0 otherwise. + Indicates whether or not external timeline semaphore interop is supported on the device. memory_pools_supported : bool - 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise. + Indicates whether or not the device supports using the cudaMallocAsync and cudaMemPool family of APIs. gpu_direct_rdma_supported : bool - 1 if the device supports GPUDirect RDMA APIs, 0 otherwise. + Indicates whether or not the device supports GPUDirect RDMA APIs. gpu_direct_rdma_flush_writes_options : int Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum. gpu_direct_rdma_writes_ordering : int @@ -206,13 +208,13 @@ class DeviceProperties: memory_pool_supported_handle_types : int Bitmask of handle types supported with mempool-based IPC. deferred_mapping_cuda_array_supported : bool - 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. + Indicates whether or not the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. ipc_event_supported : bool - 1 if the device supports IPC Events, and 0 otherwise. + Indicates whether or not the device supports IPC Events. unified_function_pointers : bool - 1 if the device supports unified pointers, and 0 otherwise. + Indicates whether or not the device supports unified pointers. host_native_atomic_supported : bool - 1 if the link between the device and the host supports native atomic operations. + Indicates whether or not the link between the device and the host supports native atomic operations. luid : bytes 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms. luid_device_node_mask : int @@ -230,7 +232,6 @@ def _init(device_id): self.uuid = prop.uuid.bytes self.total_global_mem = prop.totalGlobalMem self.shared_mem_per_block = prop.sharedMemPerBlock - self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin self.regs_per_block = prop.regsPerBlock self.warp_size = prop.warpSize self.mem_pitch = prop.memPitch @@ -252,7 +253,6 @@ def _init(device_id): self.compute_mode = prop.computeMode self.max_texture_1d = prop.maxTexture1D self.max_texture_1d_mipmap = prop.maxTexture1DMipmap - self.max_texture_1d_linear = prop.maxTexture1DLinear self.max_texture_2d = tuple(prop.maxTexture2D) self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap) self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear) @@ -292,7 +292,6 @@ def _init(device_id): self.managed_memory = bool(prop.managedMemory) self.is_multi_gpu_board = bool(prop.isMultiGpuBoard) self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID - self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio self.pageable_memory_access = bool(prop.pageableMemoryAccess) self.concurrent_managed_access = bool(prop.concurrentManagedAccess) self.compute_preemption_supported = bool(prop.computePreemptionSupported) From 9fe5633b22f1192f4c98733ac238a4c35f9d752c Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 22 Jan 2025 15:06:50 -0800 Subject: [PATCH 07/10] hook into attributes instead of the prop struct --- cuda_core/cuda/core/experimental/_device.py | 1307 ++++++++++++++----- 1 file changed, 1006 insertions(+), 301 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 6ff5a8a7..37987395 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -14,314 +14,1019 @@ _tls_lock = threading.Lock() +# ruff: noqa class DeviceProperties: """ - Represents the properties of a CUDA device. - - Attributes - ---------- - name : str - ASCII string identifying the device. - uuid : cudaUUID_t - 16-byte unique identifier. - total_global_mem : int - Total amount of global memory available on the device in bytes. - shared_mem_per_block : int - Maximum amount of shared memory available to a thread block in bytes. - regs_per_block : int - Maximum number of 32-bit registers available to a thread block. - warp_size : int - Warp size in threads. - mem_pitch : int - Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through - cudaMallocPitch(). - max_threads_per_block : int - Maximum number of threads per block. - max_threads_dim : tuple - Maximum size of each dimension of a block. - max_grid_size : tuple - Maximum size of each dimension of a grid. - clock_rate : int - Clock frequency in kilohertz. - cluster_launch : bool - Indicates whether or not the device supports cluster launch. - total_const_mem : int - Total amount of constant memory available on the device in bytes. - major : int - Major revision number defining the device's compute capability. - minor : int - Minor revision number defining the device's compute capability. - texture_alignment : int - Alignment requirement; texture base addresses that are aligned to textureAlignment bytes do not need an - offset applied to texture fetches. - texture_pitch_alignment : int - Pitch alignment requirement for 2D texture references that are bound to pitched memory. - device_overlap : bool - Indicates whether or not the device can concurrently copy memory between host and device while executing - a kernel. - multi_processor_count : int - Number of multiprocessors on the device. - kernel_exec_timeout_enabled : bool - Indicates whether or not there is a run time limit for kernels executed on the device. - integrated : bool - Indicates whether or not the device is an integrated (motherboard) GPU. - can_map_host_memory : bool - Indicates whether or not the device can map host memory into the CUDA address space for use with - cudaHostAlloc()/cudaHostGetDevicePointer(). - compute_mode : int - Compute mode that the device is currently in. - max_texture_1d : int - Maximum 1D texture size. - max_texture_1d_mipmap : int - Maximum 1D mipmapped texture size. - max_texture_1d_linear : int - Maximum 1D texture size for textures bound to linear memory. - max_texture_2d : tuple - Maximum 2D texture dimensions. - max_texture_2d_mipmap : tuple - Maximum 2D mipmapped texture dimensions. - max_texture_2d_linear : tuple - Maximum 2D texture dimensions for 2D textures bound to pitch linear memory. - max_texture_2d_gather : tuple - Maximum 2D texture dimensions if texture gather operations have to be performed. - max_texture_3d : tuple - Maximum 3D texture dimensions. - max_texture_3d_alt : tuple - Maximum alternate 3D texture dimensions. - max_texture_cubemap : int - Maximum cubemap texture width or height. - max_texture_1d_layered : tuple - Maximum 1D layered texture dimensions. - max_texture_2d_layered : tuple - Maximum 2D layered texture dimensions. - max_texture_cubemap_layered : tuple - Maximum cubemap layered texture dimensions. - max_surface_1d : int - Maximum 1D surface size. - max_surface_2d : tuple - Maximum 2D surface dimensions. - max_surface_3d : tuple - Maximum 3D surface dimensions. - max_surface_1d_layered : tuple - Maximum 1D layered surface dimensions. - max_surface_2d_layered : tuple - Maximum 2D layered surface dimensions. - max_surface_cubemap : int - Maximum cubemap surface width or height. - max_surface_cubemap_layered : tuple - Maximum cubemap layered surface dimensions. - surface_alignment : int - Alignment requirements for surfaces. - concurrent_kernels : bool - Indicates whether or not the device supports executing multiple kernels within the same context - simultaneously. - ecc_enabled : bool - Indicates whether or not the device has ECC support turned on. - pci_bus_id : int - PCI bus identifier of the device. - pci_device_id : int - PCI device (sometimes called slot) identifier of the device. - pci_domain_id : int - PCI domain identifier of the device. - tcc_driver : bool - Indicates whether or not the device is using a TCC driver. - async_engine_count : int - 1 when the device can concurrently copy memory between host and device while executing a kernel. - It is 2 when the device can concurrently copy memory between host and device in both directions - and execute a kernel at the same time. It is 0 if neither of these is supported. - unified_addressing : bool - Indicates whether or not the device shares a unified address space with the host. - memory_clock_rate : int - Peak memory clock frequency in kilohertz. - memory_bus_width : int - Memory bus width in bits. - l2_cache_size : int - L2 cache size in bytes. - persisting_l2_cache_max_size : int - L2 cache's maximum persisting lines size in bytes. - max_threads_per_multi_processor : int - Number of maximum resident threads per multiprocessor. - stream_priorities_supported : bool - Indicates whether or not the device supports stream priorities. - global_l1_cache_supported : bool - Indicates whether or not the device supports caching of globals in L1 cache. - local_l1_cache_supported : bool - Indicates whether or not the device supports caching of locals in L1 cache. - shared_mem_per_multiprocessor : int - Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all - thread blocks simultaneously resident on a multiprocessor. - regs_per_multiprocessor : int - Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread - blocks simultaneously resident on a multiprocessor. - managed_memory : bool - Indicates whether or not the device supports allocating managed memory on this system. - is_multi_gpu_board : bool - Indicates whether or not the device is on a multi-GPU board (e.g. Gemini cards). - multi_gpu_board_group_id : int - Unique identifier for a group of devices associated with the same board. Devices on the same - multi-GPU board will share the same identifier. - single_to_double_precision_perf_ratio : int - Ratio of single precision performance (in floating-point operations per second) to double precision - performance. - pageable_memory_access : bool - Indicates whether or not the device supports coherently accessing pageable memory without calling - cudaHostRegister on it. - concurrent_managed_access : bool - Indicates whether or not the device can coherently access managed memory concurrently with the CPU. - compute_preemption_supported : bool - Indicates whether or not the device supports Compute Preemption. - can_use_host_pointer_for_registered_mem : bool - Indicates whether or not the device can access host registered memory at the same virtual address as - the CPU. - cooperative_launch : bool - Indicates whether or not the device supports launching cooperative kernels via - cudaLaunchCooperativeKernel. - cooperative_multi_device_launch : bool - Indicates whether or not the device supports launching cooperative kernels via - cudaLaunchCooperativeKernelMultiDevice. - pageable_memory_access_uses_host_page_tables : bool - Indicates whether or not the device accesses pageable memory via the host's page tables. - direct_managed_mem_access_from_host : bool - Indicates whether or not the host can directly access managed memory on the device without migration. - access_policy_max_window_size : int - Maximum value of cudaAccessPolicyWindow::num_bytes. - reserved_shared_mem_per_block : int - Shared memory reserved by CUDA driver per block in bytes. - host_register_supported : bool - Indicates whether or not the device supports host memory registration via cudaHostRegister. - sparse_cuda_array_supported : bool - Indicates whether or not the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays. - host_register_read_only_supported : bool - Indicates whether or not the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly - to register memory - that must be mapped as read-only to the GPU. - timeline_semaphore_interop_supported : bool - Indicates whether or not external timeline semaphore interop is supported on the device. - memory_pools_supported : bool - Indicates whether or not the device supports using the cudaMallocAsync and cudaMemPool family of APIs. - gpu_direct_rdma_supported : bool - Indicates whether or not the device supports GPUDirect RDMA APIs. - gpu_direct_rdma_flush_writes_options : int - Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum. - gpu_direct_rdma_writes_ordering : int - See the cudaGPUDirectRDMAWritesOrdering enum for numerical values. - memory_pool_supported_handle_types : int - Bitmask of handle types supported with mempool-based IPC. - deferred_mapping_cuda_array_supported : bool - Indicates whether or not the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. - ipc_event_supported : bool - Indicates whether or not the device supports IPC Events. - unified_function_pointers : bool - Indicates whether or not the device supports unified pointers. - host_native_atomic_supported : bool - Indicates whether or not the link between the device and the host supports native atomic operations. - luid : bytes - 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms. - luid_device_node_mask : int - LUID device node mask. Value is undefined on TCC and non-Windows platforms. - max_blocks_per_multi_processor : int - Maximum number of resident blocks per multiprocessor. + A class to query various attributes of a CUDA device. + + Attributes are read-only and provide information about the device. + + Attributes: + max_threads_per_block (int): Maximum number of threads per block. + max_block_dim_x (int): Maximum x-dimension of a block. + max_block_dim_y (int): Maximum y-dimension of a block. + max_block_dim_z (int): Maximum z-dimension of a block. + max_grid_dim_x (int): Maximum x-dimension of a grid. + max_grid_dim_y (int): Maximum y-dimension of a grid. + max_grid_dim_z (int): Maximum z-dimension of a grid. + max_shared_memory_per_block (int): Maximum amount of shared memory available to a thread block in bytes. + total_constant_memory (int): Memory available on device for __constant__ variables in a CUDA C kernel in bytes. + warp_size (int): Warp size in threads. + max_pitch (int): Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch(). + maximum_texture1d_width (int): Maximum 1D texture width. + maximum_texture1d_linear_width (int): Maximum width for a 1D texture bound to linear memory. + maximum_texture1d_mipmapped_width (int): Maximum mipmapped 1D texture width. + maximum_texture2d_width (int): Maximum 2D texture width. + maximum_texture2d_height (int): Maximum 2D texture height. + maximum_texture2d_linear_width (int): Maximum width for a 2D texture bound to linear memory. + maximum_texture2d_linear_height (int): Maximum height for a 2D texture bound to linear memory. + maximum_texture2d_linear_pitch (int): Maximum pitch in bytes for a 2D texture bound to linear memory. + maximum_texture2d_mipmapped_width (int): Maximum mipmapped 2D texture width. + maximum_texture2d_mipmapped_height (int): Maximum mipmapped 2D texture height. + maximum_texture3d_width (int): Maximum 3D texture width. + maximum_texture3d_height (int): Maximum 3D texture height. + maximum_texture3d_depth (int): Maximum 3D texture depth. + maximum_texture3d_width_alternate (int): Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported. + maximum_texture3d_height_alternate (int): Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported. + maximum_texture3d_depth_alternate (int): Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported. + maximum_texturecubemap_width (int): Maximum cubemap texture width or height. + maximum_texture1d_layered_width (int): Maximum 1D layered texture width. + maximum_texture1d_layered_layers (int): Maximum layers in a 1D layered texture. + maximum_texture2d_layered_width (int): Maximum 2D layered texture width. + maximum_texture2d_layered_height (int): Maximum 2D layered texture height. + maximum_texture2d_layered_layers (int): Maximum layers in a 2D layered texture. + maximum_texturecubemap_layered_width (int): Maximum cubemap layered texture width or height. + maximum_texturecubemap_layered_layers (int): Maximum layers in a cubemap layered texture. + maximum_surface1d_width (int): Maximum 1D surface width. + maximum_surface2d_width (int): Maximum 2D surface width. + maximum_surface2d_height (int): Maximum 2D surface height. + maximum_surface3d_width (int): Maximum 3D surface width. + maximum_surface3d_height (int): Maximum 3D surface height. + maximum_surface3d_depth (int): Maximum 3D surface depth. + maximum_surface1d_layered_width (int): Maximum 1D layered surface width. + maximum_surface1d_layered_layers (int): Maximum layers in a 1D layered surface. + maximum_surface2d_layered_width (int): Maximum 2D layered surface width. + maximum_surface2d_layered_height (int): Maximum 2D layered surface height. + maximum_surface2d_layered_layers (int): Maximum layers in a 2D layered surface. + maximum_surfacecubemap_width (int): Maximum cubemap surface width. + maximum_surfacecubemap_layered_width (int): Maximum cubemap layered surface width. + maximum_surfacecubemap_layered_layers (int): Maximum layers in a cubemap layered surface. + max_registers_per_block (int): Maximum number of 32-bit registers available to a thread block. + clock_rate (int): The typical clock frequency in kilohertz. + texture_alignment (int): Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied to texture fetches. + texture_pitch_alignment (int): Pitch alignment requirement for 2D texture references bound to pitched memory. + gpu_overlap (bool): True if the device can concurrently copy memory between host and device while executing a kernel, False if not. + multiprocessor_count (int): Number of multiprocessors on the device. + kernel_exec_timeout (bool): True if there is a run time limit for kernels executed on the device, False if not. + integrated (bool): True if the device is integrated with the memory subsystem, False if not. + can_map_host_memory (bool): True if the device can map host memory into the CUDA address space, False if not. + compute_mode (int): Compute mode that device is currently in. + concurrent_kernels (bool): True if the device supports executing multiple kernels within the same context simultaneously, False if not. + ecc_enabled (bool): True if error correction is enabled on the device, False if error correction is disabled or not supported by the device. + pci_bus_id (int): PCI bus identifier of the device. + pci_device_id (int): PCI device (also known as slot) identifier of the device. + pci_domain_id (int): PCI domain identifier of the device. + tcc_driver (bool): True if the device is using a TCC driver, False if not. + memory_clock_rate (int): Peak memory clock frequency in kilohertz. + global_memory_bus_width (int): Global memory bus width in bits. + l2_cache_size (int): Size of L2 cache in bytes, 0 if the device doesn't have L2 cache. + max_threads_per_multiprocessor (int): Maximum resident threads per multiprocessor. + unified_addressing (bool): True if the device shares a unified address space with the host, False if not. + compute_capability_major (int): Major compute capability version number. + compute_capability_minor (int): Minor compute capability version number. + global_l1_cache_supported (bool): True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported by the device. + local_l1_cache_supported (bool): True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported by the device. + max_shared_memory_per_multiprocessor (int): Maximum amount of shared memory available to a multiprocessor in bytes. + max_registers_per_multiprocessor (int): Maximum number of 32-bit registers available to a multiprocessor. + managed_memory (bool): True if device supports allocating managed memory on this system, False if allocating managed memory is not supported by the device on this system. + multi_gpu_board (bool): True if device is on a multi-GPU board, False if not. + multi_gpu_board_group_id (int): Unique identifier for a group of devices associated with the same board. + host_native_atomic_supported (bool): True if Link between the device and the host supports native atomic operations, False if not. + single_to_double_precision_perf_ratio (int): Ratio of single precision performance (in floating-point operations per second) to double precision performance. + pageable_memory_access (bool): True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, False if not. + concurrent_managed_access (bool): True if device can coherently access managed memory concurrently with the CPU, False if not. + compute_preemption_supported (bool): True if device supports Compute Preemption, False if not. + can_use_host_pointer_for_registered_mem (bool): True if device can access host registered memory at the same virtual address as the CPU, False if not. + max_shared_memory_per_block_optin (int): The maximum per block shared memory size supported on this device. + pageable_memory_access_uses_host_page_tables (bool): True if device accesses pageable memory via the host's page tables, False if not. + direct_managed_mem_access_from_host (bool): True if the host can directly access managed memory on the device without migration, False if not. + virtual_memory_management_supported (bool): True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs, False if not. + handle_type_posix_file_descriptor_supported (bool): True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, False if not. + handle_type_win32_handle_supported (bool): True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, False if not. + handle_type_win32_kmt_handle_supported (bool): True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, False if not. + max_blocks_per_multiprocessor (int): Maximum number of thread blocks that can reside on a multiprocessor. + generic_compression_supported (bool): True if device supports compressible memory allocation via cuMemCreate, False if not. + max_persisting_l2_cache_size (int): Maximum L2 persisting lines capacity setting in bytes. + max_access_policy_window_size (int): Maximum value of CUaccessPolicyWindow::num_bytes. + gpu_direct_rdma_with_cuda_vmm_supported (bool): True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not. + reserved_shared_memory_per_block (int): Amount of shared memory per block reserved by CUDA driver in bytes. + sparse_cuda_array_supported (bool): True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not. + read_only_host_register_supported (bool): True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not. + memory_pools_supported (bool): True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not. + gpu_direct_rdma_supported (bool): True if device supports GPUDirect RDMA APIs, False if not. + gpu_direct_rdma_flush_writes_options (int): The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum. + gpu_direct_rdma_writes_ordering (int): GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. + mempool_supported_handle_types (int): Bitmask of handle types supported with mempool based IPC. + deferred_mapping_cuda_array_supported (bool): True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not. + numa_config (int): NUMA configuration of a device. + numa_id (int): NUMA node ID of the GPU memory. + multicast_supported (bool): True if device supports switch multicast and reduction operations, False if not. """ - def _init(device_id): - self = DeviceProperties.__new__(DeviceProperties) + def __init__(self): + raise RuntimeError("DeviceProperties should not be instantiated directly") + + slots = "_handle" - prop = handle_return(runtime.cudaGetDeviceProperties(device_id)) - - self.name = prop.name.decode("utf-8") - self.uuid = prop.uuid.bytes - self.total_global_mem = prop.totalGlobalMem - self.shared_mem_per_block = prop.sharedMemPerBlock - self.regs_per_block = prop.regsPerBlock - self.warp_size = prop.warpSize - self.mem_pitch = prop.memPitch - self.max_threads_per_block = prop.maxThreadsPerBlock - self.max_threads_dim = tuple(prop.maxThreadsDim) - self.max_grid_size = tuple(prop.maxGridSize) - self.clock_rate = prop.clockRate - self.cluster_launch = bool(prop.clusterLaunch) - self.total_const_mem = prop.totalConstMem - self.major = prop.major - self.minor = prop.minor - self.texture_alignment = prop.textureAlignment - self.texture_pitch_alignment = prop.texturePitchAlignment - self.device_overlap = bool(prop.deviceOverlap) - self.multi_processor_count = prop.multiProcessorCount - self.kernel_exec_timeout_enabled = bool(prop.kernelExecTimeoutEnabled) - self.integrated = bool(prop.integrated) - self.can_map_host_memory = bool(prop.canMapHostMemory) - self.compute_mode = prop.computeMode - self.max_texture_1d = prop.maxTexture1D - self.max_texture_1d_mipmap = prop.maxTexture1DMipmap - self.max_texture_2d = tuple(prop.maxTexture2D) - self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap) - self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear) - self.max_texture_2d_gather = tuple(prop.maxTexture2DGather) - self.max_texture_3d = tuple(prop.maxTexture3D) - self.max_texture_3d_alt = tuple(prop.maxTexture3DAlt) - self.max_texture_cubemap = prop.maxTextureCubemap - self.max_texture_1d_layered = tuple(prop.maxTexture1DLayered) - self.max_texture_2d_layered = tuple(prop.maxTexture2DLayered) - self.max_texture_cubemap_layered = tuple(prop.maxTextureCubemapLayered) - self.max_surface_1d = prop.maxSurface1D - self.max_surface_2d = tuple(prop.maxSurface2D) - self.max_surface_3d = tuple(prop.maxSurface3D) - self.max_surface_1d_layered = tuple(prop.maxSurface1DLayered) - self.max_surface_2d_layered = tuple(prop.maxSurface2DLayered) - self.max_surface_cubemap = prop.maxSurfaceCubemap - self.max_surface_cubemap_layered = tuple(prop.maxSurfaceCubemapLayered) - self.surface_alignment = prop.surfaceAlignment - self.concurrent_kernels = bool(prop.concurrentKernels) - self.ecc_enabled = bool(prop.ECCEnabled) - self.pci_bus_id = prop.pciBusID - self.pci_device_id = prop.pciDeviceID - self.pci_domain_id = prop.pciDomainID - self.tcc_driver = bool(prop.tccDriver) - self.async_engine_count = prop.asyncEngineCount - self.unified_addressing = bool(prop.unifiedAddressing) - self.memory_clock_rate = prop.memoryClockRate - self.memory_bus_width = prop.memoryBusWidth - self.l2_cache_size = prop.l2CacheSize - self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize - self.max_threads_per_multi_processor = prop.maxThreadsPerMultiProcessor - self.stream_priorities_supported = bool(prop.streamPrioritiesSupported) - self.global_l1_cache_supported = bool(prop.globalL1CacheSupported) - self.local_l1_cache_supported = bool(prop.localL1CacheSupported) - self.shared_mem_per_multiprocessor = prop.sharedMemPerMultiprocessor - self.regs_per_multiprocessor = prop.regsPerMultiprocessor - self.managed_memory = bool(prop.managedMemory) - self.is_multi_gpu_board = bool(prop.isMultiGpuBoard) - self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID - self.pageable_memory_access = bool(prop.pageableMemoryAccess) - self.concurrent_managed_access = bool(prop.concurrentManagedAccess) - self.compute_preemption_supported = bool(prop.computePreemptionSupported) - self.can_use_host_pointer_for_registered_mem = bool(prop.canUseHostPointerForRegisteredMem) - self.cooperative_launch = bool(prop.cooperativeLaunch) - self.cooperative_multi_device_launch = bool(prop.cooperativeMultiDeviceLaunch) - self.pageable_memory_access_uses_host_page_tables = bool(prop.pageableMemoryAccessUsesHostPageTables) - self.direct_managed_mem_access_from_host = bool(prop.directManagedMemAccessFromHost) - self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize - self.reserved_shared_mem_per_block = prop.reservedSharedMemPerBlock - self.host_register_supported = bool(prop.hostRegisterSupported) - self.sparse_cuda_array_supported = bool(prop.sparseCudaArraySupported) - self.host_register_read_only_supported = bool(prop.hostRegisterReadOnlySupported) - self.timeline_semaphore_interop_supported = bool(prop.timelineSemaphoreInteropSupported) - self.memory_pools_supported = bool(prop.memoryPoolsSupported) - self.gpu_direct_rdma_supported = bool(prop.gpuDirectRDMASupported) - self.gpu_direct_rdma_flush_writes_options = prop.gpuDirectRDMAFlushWritesOptions - self.gpu_direct_rdma_writes_ordering = prop.gpuDirectRDMAWritesOrdering - self.memory_pool_supported_handle_types = prop.memoryPoolSupportedHandleTypes - self.deferred_mapping_cuda_array_supported = bool(prop.deferredMappingCudaArraySupported) - self.ipc_event_supported = bool(prop.ipcEventSupported) - self.unified_function_pointers = bool(prop.unifiedFunctionPointers) - self.host_native_atomic_supported = bool(prop.hostNativeAtomicSupported) - self.luid = prop.luid - self.luid_device_node_mask = prop.luidDeviceNodeMask - self.max_blocks_per_multi_processor = prop.maxBlocksPerMultiProcessor + def _init(handle): + self = DeviceProperties.__new__(DeviceProperties) + self._handle = handle return self - def __init__(self, device_id): - raise RuntimeError("DeviceProperties should not be instantiated directly") + @property + def max_threads_per_block(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, self._handle + ) + ) + + @property + def max_block_dim_x(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, self._handle) + ) + + @property + def max_block_dim_y(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, self._handle) + ) + + @property + def max_block_dim_z(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, self._handle) + ) + + @property + def max_grid_dim_x(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, self._handle) + ) + + @property + def max_grid_dim_y(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, self._handle) + ) + + @property + def max_grid_dim_z(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, self._handle) + ) + + @property + def max_shared_memory_per_block(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, self._handle + ) + ) + + @property + def total_constant_memory(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, self._handle + ) + ) + + @property + def warp_size(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, self._handle) + ) + + @property + def max_pitch(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, self._handle) + ) + + @property + def maximum_texture1d_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, self._handle + ) + ) + + @property + def maximum_texture1d_linear_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, self._handle + ) + ) + + @property + def maximum_texture1d_mipmapped_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, self._handle + ) + ) + + @property + def maximum_texture2d_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, self._handle + ) + ) + + @property + def maximum_texture2d_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, self._handle + ) + ) + + @property + def maximum_texture2d_linear_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, self._handle + ) + ) + + @property + def maximum_texture2d_linear_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, self._handle + ) + ) + + @property + def maximum_texture2d_linear_pitch(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, self._handle + ) + ) + + @property + def maximum_texture2d_mipmapped_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, self._handle + ) + ) + + @property + def maximum_texture2d_mipmapped_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, self._handle + ) + ) + + @property + def maximum_texture3d_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, self._handle + ) + ) + + @property + def maximum_texture3d_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, self._handle + ) + ) + + @property + def maximum_texture3d_depth(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, self._handle + ) + ) + + @property + def maximum_texture3d_width_alternate(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, self._handle + ) + ) + + @property + def maximum_texture3d_height_alternate(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, self._handle + ) + ) + + @property + def maximum_texture3d_depth_alternate(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, self._handle + ) + ) + + @property + def maximum_texturecubemap_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, self._handle + ) + ) + + @property + def maximum_texture1d_layered_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, self._handle + ) + ) + + @property + def maximum_texture1d_layered_layers(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, self._handle + ) + ) + + @property + def maximum_texture2d_layered_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, self._handle + ) + ) + + @property + def maximum_texture2d_layered_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, self._handle + ) + ) + + @property + def maximum_texture2d_layered_layers(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, self._handle + ) + ) + + @property + def maximum_texturecubemap_layered_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, self._handle + ) + ) + + @property + def maximum_texturecubemap_layered_layers(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, self._handle + ) + ) + + @property + def maximum_surface1d_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, self._handle + ) + ) + + @property + def maximum_surface2d_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, self._handle + ) + ) + + @property + def maximum_surface2d_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, self._handle + ) + ) + + @property + def maximum_surface3d_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, self._handle + ) + ) + + @property + def maximum_surface3d_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, self._handle + ) + ) + + @property + def maximum_surface3d_depth(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, self._handle + ) + ) + + @property + def maximum_surface1d_layered_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, self._handle + ) + ) + + @property + def maximum_surface1d_layered_layers(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, self._handle + ) + ) + + @property + def maximum_surface2d_layered_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, self._handle + ) + ) + + @property + def maximum_surface2d_layered_height(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, self._handle + ) + ) + + @property + def maximum_surface2d_layered_layers(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, self._handle + ) + ) + + @property + def maximum_surfacecubemap_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, self._handle + ) + ) + + @property + def maximum_surfacecubemap_layered_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, self._handle + ) + ) + + @property + def maximum_surfacecubemap_layered_layers(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, self._handle + ) + ) + + @property + def max_registers_per_block(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, self._handle + ) + ) + + @property + def clock_rate(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, self._handle) + ) + + @property + def texture_alignment(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, self._handle) + ) + + @property + def texture_pitch_alignment(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, self._handle + ) + ) + + @property + def gpu_overlap(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, self._handle) + ) + ) + + @property + def multiprocessor_count(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, self._handle + ) + ) + + @property + def kernel_exec_timeout(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, self._handle + ) + ) + ) + + @property + def integrated(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, self._handle) + ) + ) + + @property + def can_map_host_memory(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, self._handle + ) + ) + ) + + @property + def compute_mode(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, self._handle) + ) + + @property + def concurrent_kernels(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, self._handle + ) + ) + ) + + @property + def ecc_enabled(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, self._handle) + ) + ) + + @property + def pci_bus_id(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, self._handle) + ) + + @property + def pci_device_id(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, self._handle) + ) + + @property + def pci_domain_id(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, self._handle) + ) + + @property + def tcc_driver(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, self._handle) + ) + ) + + @property + def memory_clock_rate(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, self._handle) + ) + + @property + def global_memory_bus_width(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, self._handle + ) + ) + + @property + def l2_cache_size(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, self._handle) + ) + + @property + def max_threads_per_multiprocessor(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, self._handle + ) + ) + + @property + def unified_addressing(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, self._handle + ) + ) + ) + + @property + def compute_capability_major(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, self._handle + ) + ) + + @property + def compute_capability_minor(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, self._handle + ) + ) + + @property + def global_l1_cache_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, self._handle + ) + ) + ) + + @property + def local_l1_cache_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, self._handle + ) + ) + ) + + @property + def max_shared_memory_per_multiprocessor(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, self._handle + ) + ) + + @property + def max_registers_per_multiprocessor(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, self._handle + ) + ) + + @property + def managed_memory(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, self._handle) + ) + ) + + @property + def multi_gpu_board(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, self._handle) + ) + ) + + @property + def multi_gpu_board_group_id(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, self._handle + ) + ) + + @property + def host_native_atomic_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, self._handle + ) + ) + ) + + @property + def single_to_double_precision_perf_ratio(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, self._handle + ) + ) + + @property + def pageable_memory_access(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, self._handle + ) + ) + ) + + @property + def concurrent_managed_access(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, self._handle + ) + ) + ) + + @property + def compute_preemption_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, self._handle + ) + ) + ) + + @property + def can_use_host_pointer_for_registered_mem(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, self._handle + ) + ) + ) + + @property + def max_shared_memory_per_block_optin(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, self._handle + ) + ) + + @property + def pageable_memory_access_uses_host_page_tables(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, + self._handle, + ) + ) + ) + + @property + def direct_managed_mem_access_from_host(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, self._handle + ) + ) + ) + + @property + def virtual_memory_management_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, self._handle + ) + ) + ) + + @property + def handle_type_posix_file_descriptor_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED, + self._handle, + ) + ) + ) + + @property + def handle_type_win32_handle_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, self._handle + ) + ) + ) + + @property + def handle_type_win32_kmt_handle_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED, self._handle + ) + ) + ) + + @property + def max_blocks_per_multiprocessor(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, self._handle + ) + ) + + @property + def generic_compression_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED, self._handle + ) + ) + ) + + @property + def max_persisting_l2_cache_size(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, self._handle + ) + ) + + @property + def max_access_policy_window_size(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, self._handle + ) + ) + + @property + def gpu_direct_rdma_with_cuda_vmm_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, self._handle + ) + ) + ) + + @property + def reserved_shared_memory_per_block(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, self._handle + ) + ) + + @property + def sparse_cuda_array_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, self._handle + ) + ) + ) + + @property + def read_only_host_register_supported(self): + return bool(handle_return(driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY))) + + @property + def memory_pools_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, self._handle + ) + ) + ) + + @property + def gpu_direct_rdma_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, self._handle + ) + ) + ) + + @property + def gpu_direct_rdma_flush_writes_options(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, self._handle + ) + ) + + @property + def gpu_direct_rdma_writes_ordering(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, self._handle + ) + ) + + @property + def mempool_supported_handle_types(self): + return handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, self._handle + ) + ) + + @property + def deferred_mapping_cuda_array_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED, self._handle + ) + ) + ) + + @property + def numa_config(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, self._handle) + ) + + @property + def numa_id(self): + return handle_return( + driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, self._handle) + ) + + @property + def multicast_supported(self): + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED, self._handle + ) + ) + ) + + +# ruff: enable class Device: From 570ea62d6490a4ba397ab2ccd909ff6510c26ed8 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Thu, 23 Jan 2025 19:05:17 -0800 Subject: [PATCH 08/10] update test --- cuda_core/cuda/core/experimental/_device.py | 8 +- cuda_core/tests/test_device.py | 152 +++++++++++--------- 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 37987395..df1da741 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -947,7 +947,13 @@ def sparse_cuda_array_supported(self): @property def read_only_host_register_supported(self): - return bool(handle_return(driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY))) + return bool( + handle_return( + driver.cuDeviceGetAttribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED, self._handle + ) + ) + ) @property def memory_pools_supported(self): diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 0a441996..30287a38 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -81,109 +81,119 @@ def test_compute_capability(): assert device.compute_capability == expected_cc -def test_device_property_values(): - device = Device() - assert device.properties.name == device.name - assert device.properties.uuid.hex() == device.uuid.replace("-", "") - - cuda_base_properties = [ - ("name", str), - ("uuid", bytes), - ("total_global_mem", int), - ("shared_mem_per_block", int), - ("regs_per_block", int), - ("warp_size", int), - ("mem_pitch", int), ("max_threads_per_block", int), - ("max_threads_dim", tuple), - ("max_grid_size", tuple), - ("total_const_mem", int), - ("major", int), - ("minor", int), + ("max_block_dim_x", int), + ("max_block_dim_y", int), + ("max_block_dim_z", int), + ("max_grid_dim_x", int), + ("max_grid_dim_y", int), + ("max_grid_dim_z", int), + ("max_shared_memory_per_block", int), + ("total_constant_memory", int), + ("warp_size", int), + ("max_pitch", int), + ("maximum_texture1d_width", int), + ("maximum_texture1d_linear_width", int), + ("maximum_texture1d_mipmapped_width", int), + ("maximum_texture2d_width", int), + ("maximum_texture2d_height", int), + ("maximum_texture2d_linear_width", int), + ("maximum_texture2d_linear_height", int), + ("maximum_texture2d_linear_pitch", int), + ("maximum_texture2d_mipmapped_width", int), + ("maximum_texture2d_mipmapped_height", int), + ("maximum_texture3d_width", int), + ("maximum_texture3d_height", int), + ("maximum_texture3d_depth", int), + ("maximum_texture3d_width_alternate", int), + ("maximum_texture3d_height_alternate", int), + ("maximum_texture3d_depth_alternate", int), + ("maximum_texturecubemap_width", int), + ("maximum_texture1d_layered_width", int), + ("maximum_texture1d_layered_layers", int), + ("maximum_texture2d_layered_width", int), + ("maximum_texture2d_layered_height", int), + ("maximum_texture2d_layered_layers", int), + ("maximum_texturecubemap_layered_width", int), + ("maximum_texturecubemap_layered_layers", int), + ("maximum_surface1d_width", int), + ("maximum_surface2d_width", int), + ("maximum_surface2d_height", int), + ("maximum_surface3d_width", int), + ("maximum_surface3d_height", int), + ("maximum_surface3d_depth", int), + ("maximum_surface1d_layered_width", int), + ("maximum_surface1d_layered_layers", int), + ("maximum_surface2d_layered_width", int), + ("maximum_surface2d_layered_height", int), + ("maximum_surface2d_layered_layers", int), + ("maximum_surfacecubemap_width", int), + ("maximum_surfacecubemap_layered_width", int), + ("maximum_surfacecubemap_layered_layers", int), + ("max_registers_per_block", int), + ("clock_rate", int), ("texture_alignment", int), ("texture_pitch_alignment", int), - ("multi_processor_count", int), + ("gpu_overlap", bool), + ("multiprocessor_count", int), + ("kernel_exec_timeout", bool), ("integrated", bool), ("can_map_host_memory", bool), - ("max_texture_1d", int), - ("max_texture_1d_mipmap", int), - ("max_texture_2d", tuple), - ("max_texture_2d_mipmap", tuple), - ("max_texture_2d_linear", tuple), - ("max_texture_2d_gather", tuple), - ("max_texture_3d", tuple), - ("max_texture_3d_alt", tuple), - ("max_texture_cubemap", int), - ("max_texture_1d_layered", tuple), - ("max_texture_2d_layered", tuple), - ("max_texture_cubemap_layered", tuple), - ("max_surface_1d", int), - ("max_surface_2d", tuple), - ("max_surface_3d", tuple), - ("max_surface_1d_layered", tuple), - ("max_surface_2d_layered", tuple), - ("max_surface_cubemap", int), - ("max_surface_cubemap_layered", tuple), - ("surface_alignment", int), + ("compute_mode", int), ("concurrent_kernels", bool), ("ecc_enabled", bool), ("pci_bus_id", int), ("pci_device_id", int), ("pci_domain_id", int), ("tcc_driver", bool), - ("async_engine_count", int), - ("unified_addressing", bool), - ("memory_bus_width", int), + ("memory_clock_rate", int), + ("global_memory_bus_width", int), ("l2_cache_size", int), - ("persisting_l2_cache_max_size", int), - ("max_threads_per_multi_processor", int), - ("stream_priorities_supported", bool), + ("max_threads_per_multiprocessor", int), + ("unified_addressing", bool), + ("compute_capability_major", int), + ("compute_capability_minor", int), ("global_l1_cache_supported", bool), ("local_l1_cache_supported", bool), - ("shared_mem_per_multiprocessor", int), - ("regs_per_multiprocessor", int), + ("max_shared_memory_per_multiprocessor", int), + ("max_registers_per_multiprocessor", int), ("managed_memory", bool), - ("is_multi_gpu_board", bool), + ("multi_gpu_board", bool), ("multi_gpu_board_group_id", int), + ("host_native_atomic_supported", bool), + ("single_to_double_precision_perf_ratio", int), ("pageable_memory_access", bool), ("concurrent_managed_access", bool), ("compute_preemption_supported", bool), ("can_use_host_pointer_for_registered_mem", bool), - ("cooperative_launch", bool), + ("max_shared_memory_per_block_optin", int), ("pageable_memory_access_uses_host_page_tables", bool), ("direct_managed_mem_access_from_host", bool), - ("access_policy_max_window_size", int), - ("reserved_shared_mem_per_block", int), - ("host_register_supported", bool), + ("virtual_memory_management_supported", bool), + ("handle_type_posix_file_descriptor_supported", bool), + ("handle_type_win32_handle_supported", bool), + ("handle_type_win32_kmt_handle_supported", bool), + ("max_blocks_per_multiprocessor", int), + ("generic_compression_supported", bool), + ("max_persisting_l2_cache_size", int), + ("max_access_policy_window_size", int), + ("gpu_direct_rdma_with_cuda_vmm_supported", bool), + ("reserved_shared_memory_per_block", int), ("sparse_cuda_array_supported", bool), - ("host_register_read_only_supported", bool), - ("timeline_semaphore_interop_supported", bool), + ("read_only_host_register_supported", bool), ("memory_pools_supported", bool), ("gpu_direct_rdma_supported", bool), ("gpu_direct_rdma_flush_writes_options", int), ("gpu_direct_rdma_writes_ordering", int), - ("memory_pool_supported_handle_types", int), + ("mempool_supported_handle_types", int), ("deferred_mapping_cuda_array_supported", bool), - ("ipc_event_supported", bool), - ("unified_function_pointers", bool), -] - -cuda_12_properties = [ - ("host_native_atomic_supported", bool), - ("luid", bytes), - ("luid_device_node_mask", int), - ("max_blocks_per_multi_processor", int), - ("shared_mem_per_block_optin", int), - ("cluster_launch", bool), + ("numa_config", int), + ("numa_id", int), + ("multicast_supported", bool), ] -driver_ver = handle_return(driver.cuDriverGetVersion()) -if driver_ver >= 12000: - cuda_base_properties += cuda_12_properties - - @pytest.mark.parametrize("property_name, expected_type", cuda_base_properties) def test_device_property_types(property_name, expected_type): device = Device() From 3e94c64b9efdb9b9e85fcd0bc8e549bfee2081cd Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 27 Jan 2025 12:35:55 -0800 Subject: [PATCH 09/10] add type hints and individual attribute docstrings --- cuda_core/cuda/core/experimental/_device.py | 678 +++++++++++++------- 1 file changed, 453 insertions(+), 225 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index df1da741..e2894ecc 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -14,123 +14,11 @@ _tls_lock = threading.Lock() -# ruff: noqa class DeviceProperties: """ A class to query various attributes of a CUDA device. Attributes are read-only and provide information about the device. - - Attributes: - max_threads_per_block (int): Maximum number of threads per block. - max_block_dim_x (int): Maximum x-dimension of a block. - max_block_dim_y (int): Maximum y-dimension of a block. - max_block_dim_z (int): Maximum z-dimension of a block. - max_grid_dim_x (int): Maximum x-dimension of a grid. - max_grid_dim_y (int): Maximum y-dimension of a grid. - max_grid_dim_z (int): Maximum z-dimension of a grid. - max_shared_memory_per_block (int): Maximum amount of shared memory available to a thread block in bytes. - total_constant_memory (int): Memory available on device for __constant__ variables in a CUDA C kernel in bytes. - warp_size (int): Warp size in threads. - max_pitch (int): Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch(). - maximum_texture1d_width (int): Maximum 1D texture width. - maximum_texture1d_linear_width (int): Maximum width for a 1D texture bound to linear memory. - maximum_texture1d_mipmapped_width (int): Maximum mipmapped 1D texture width. - maximum_texture2d_width (int): Maximum 2D texture width. - maximum_texture2d_height (int): Maximum 2D texture height. - maximum_texture2d_linear_width (int): Maximum width for a 2D texture bound to linear memory. - maximum_texture2d_linear_height (int): Maximum height for a 2D texture bound to linear memory. - maximum_texture2d_linear_pitch (int): Maximum pitch in bytes for a 2D texture bound to linear memory. - maximum_texture2d_mipmapped_width (int): Maximum mipmapped 2D texture width. - maximum_texture2d_mipmapped_height (int): Maximum mipmapped 2D texture height. - maximum_texture3d_width (int): Maximum 3D texture width. - maximum_texture3d_height (int): Maximum 3D texture height. - maximum_texture3d_depth (int): Maximum 3D texture depth. - maximum_texture3d_width_alternate (int): Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported. - maximum_texture3d_height_alternate (int): Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported. - maximum_texture3d_depth_alternate (int): Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported. - maximum_texturecubemap_width (int): Maximum cubemap texture width or height. - maximum_texture1d_layered_width (int): Maximum 1D layered texture width. - maximum_texture1d_layered_layers (int): Maximum layers in a 1D layered texture. - maximum_texture2d_layered_width (int): Maximum 2D layered texture width. - maximum_texture2d_layered_height (int): Maximum 2D layered texture height. - maximum_texture2d_layered_layers (int): Maximum layers in a 2D layered texture. - maximum_texturecubemap_layered_width (int): Maximum cubemap layered texture width or height. - maximum_texturecubemap_layered_layers (int): Maximum layers in a cubemap layered texture. - maximum_surface1d_width (int): Maximum 1D surface width. - maximum_surface2d_width (int): Maximum 2D surface width. - maximum_surface2d_height (int): Maximum 2D surface height. - maximum_surface3d_width (int): Maximum 3D surface width. - maximum_surface3d_height (int): Maximum 3D surface height. - maximum_surface3d_depth (int): Maximum 3D surface depth. - maximum_surface1d_layered_width (int): Maximum 1D layered surface width. - maximum_surface1d_layered_layers (int): Maximum layers in a 1D layered surface. - maximum_surface2d_layered_width (int): Maximum 2D layered surface width. - maximum_surface2d_layered_height (int): Maximum 2D layered surface height. - maximum_surface2d_layered_layers (int): Maximum layers in a 2D layered surface. - maximum_surfacecubemap_width (int): Maximum cubemap surface width. - maximum_surfacecubemap_layered_width (int): Maximum cubemap layered surface width. - maximum_surfacecubemap_layered_layers (int): Maximum layers in a cubemap layered surface. - max_registers_per_block (int): Maximum number of 32-bit registers available to a thread block. - clock_rate (int): The typical clock frequency in kilohertz. - texture_alignment (int): Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied to texture fetches. - texture_pitch_alignment (int): Pitch alignment requirement for 2D texture references bound to pitched memory. - gpu_overlap (bool): True if the device can concurrently copy memory between host and device while executing a kernel, False if not. - multiprocessor_count (int): Number of multiprocessors on the device. - kernel_exec_timeout (bool): True if there is a run time limit for kernels executed on the device, False if not. - integrated (bool): True if the device is integrated with the memory subsystem, False if not. - can_map_host_memory (bool): True if the device can map host memory into the CUDA address space, False if not. - compute_mode (int): Compute mode that device is currently in. - concurrent_kernels (bool): True if the device supports executing multiple kernels within the same context simultaneously, False if not. - ecc_enabled (bool): True if error correction is enabled on the device, False if error correction is disabled or not supported by the device. - pci_bus_id (int): PCI bus identifier of the device. - pci_device_id (int): PCI device (also known as slot) identifier of the device. - pci_domain_id (int): PCI domain identifier of the device. - tcc_driver (bool): True if the device is using a TCC driver, False if not. - memory_clock_rate (int): Peak memory clock frequency in kilohertz. - global_memory_bus_width (int): Global memory bus width in bits. - l2_cache_size (int): Size of L2 cache in bytes, 0 if the device doesn't have L2 cache. - max_threads_per_multiprocessor (int): Maximum resident threads per multiprocessor. - unified_addressing (bool): True if the device shares a unified address space with the host, False if not. - compute_capability_major (int): Major compute capability version number. - compute_capability_minor (int): Minor compute capability version number. - global_l1_cache_supported (bool): True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported by the device. - local_l1_cache_supported (bool): True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported by the device. - max_shared_memory_per_multiprocessor (int): Maximum amount of shared memory available to a multiprocessor in bytes. - max_registers_per_multiprocessor (int): Maximum number of 32-bit registers available to a multiprocessor. - managed_memory (bool): True if device supports allocating managed memory on this system, False if allocating managed memory is not supported by the device on this system. - multi_gpu_board (bool): True if device is on a multi-GPU board, False if not. - multi_gpu_board_group_id (int): Unique identifier for a group of devices associated with the same board. - host_native_atomic_supported (bool): True if Link between the device and the host supports native atomic operations, False if not. - single_to_double_precision_perf_ratio (int): Ratio of single precision performance (in floating-point operations per second) to double precision performance. - pageable_memory_access (bool): True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, False if not. - concurrent_managed_access (bool): True if device can coherently access managed memory concurrently with the CPU, False if not. - compute_preemption_supported (bool): True if device supports Compute Preemption, False if not. - can_use_host_pointer_for_registered_mem (bool): True if device can access host registered memory at the same virtual address as the CPU, False if not. - max_shared_memory_per_block_optin (int): The maximum per block shared memory size supported on this device. - pageable_memory_access_uses_host_page_tables (bool): True if device accesses pageable memory via the host's page tables, False if not. - direct_managed_mem_access_from_host (bool): True if the host can directly access managed memory on the device without migration, False if not. - virtual_memory_management_supported (bool): True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs, False if not. - handle_type_posix_file_descriptor_supported (bool): True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, False if not. - handle_type_win32_handle_supported (bool): True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, False if not. - handle_type_win32_kmt_handle_supported (bool): True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, False if not. - max_blocks_per_multiprocessor (int): Maximum number of thread blocks that can reside on a multiprocessor. - generic_compression_supported (bool): True if device supports compressible memory allocation via cuMemCreate, False if not. - max_persisting_l2_cache_size (int): Maximum L2 persisting lines capacity setting in bytes. - max_access_policy_window_size (int): Maximum value of CUaccessPolicyWindow::num_bytes. - gpu_direct_rdma_with_cuda_vmm_supported (bool): True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not. - reserved_shared_memory_per_block (int): Amount of shared memory per block reserved by CUDA driver in bytes. - sparse_cuda_array_supported (bool): True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not. - read_only_host_register_supported (bool): True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not. - memory_pools_supported (bool): True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not. - gpu_direct_rdma_supported (bool): True if device supports GPUDirect RDMA APIs, False if not. - gpu_direct_rdma_flush_writes_options (int): The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum. - gpu_direct_rdma_writes_ordering (int): GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. - mempool_supported_handle_types (int): Bitmask of handle types supported with mempool based IPC. - deferred_mapping_cuda_array_supported (bool): True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not. - numa_config (int): NUMA configuration of a device. - numa_id (int): NUMA node ID of the GPU memory. - multicast_supported (bool): True if device supports switch multicast and reduction operations, False if not. """ def __init__(self): @@ -144,7 +32,10 @@ def _init(handle): return self @property - def max_threads_per_block(self): + def max_threads_per_block(self) -> int: + """ + int: Maximum number of threads per block. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, self._handle @@ -152,43 +43,64 @@ def max_threads_per_block(self): ) @property - def max_block_dim_x(self): + def max_block_dim_x(self) -> int: + """ + int: Maximum x-dimension of a block. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, self._handle) ) @property - def max_block_dim_y(self): + def max_block_dim_y(self) -> int: + """ + int: Maximum y-dimension of a block. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, self._handle) ) @property - def max_block_dim_z(self): + def max_block_dim_z(self) -> int: + """ + int: Maximum z-dimension of a block. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, self._handle) ) @property - def max_grid_dim_x(self): + def max_grid_dim_x(self) -> int: + """ + int: Maximum x-dimension of a grid. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, self._handle) ) @property - def max_grid_dim_y(self): + def max_grid_dim_y(self) -> int: + """ + int: Maximum y-dimension of a grid. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, self._handle) ) @property - def max_grid_dim_z(self): + def max_grid_dim_z(self) -> int: + """ + int: Maximum z-dimension of a grid. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, self._handle) ) @property - def max_shared_memory_per_block(self): + def max_shared_memory_per_block(self) -> int: + """ + int: Maximum amount of shared memory available to a thread block in bytes. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, self._handle @@ -196,7 +108,10 @@ def max_shared_memory_per_block(self): ) @property - def total_constant_memory(self): + def total_constant_memory(self) -> int: + """ + int: Memory available on device for __constant__ variables in a CUDA C kernel in bytes. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, self._handle @@ -204,19 +119,29 @@ def total_constant_memory(self): ) @property - def warp_size(self): + def warp_size(self) -> int: + """ + int: Warp size in threads. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, self._handle) ) @property - def max_pitch(self): + def max_pitch(self) -> int: + """ + int: Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated + through cuMemAllocPitch(). + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, self._handle) ) @property - def maximum_texture1d_width(self): + def maximum_texture1d_width(self) -> int: + """ + int: Maximum 1D texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, self._handle @@ -224,7 +149,10 @@ def maximum_texture1d_width(self): ) @property - def maximum_texture1d_linear_width(self): + def maximum_texture1d_linear_width(self) -> int: + """ + int: Maximum width for a 1D texture bound to linear memory. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, self._handle @@ -232,7 +160,10 @@ def maximum_texture1d_linear_width(self): ) @property - def maximum_texture1d_mipmapped_width(self): + def maximum_texture1d_mipmapped_width(self) -> int: + """ + int: Maximum mipmapped 1D texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, self._handle @@ -240,7 +171,10 @@ def maximum_texture1d_mipmapped_width(self): ) @property - def maximum_texture2d_width(self): + def maximum_texture2d_width(self) -> int: + """ + int: Maximum 2D texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, self._handle @@ -248,7 +182,10 @@ def maximum_texture2d_width(self): ) @property - def maximum_texture2d_height(self): + def maximum_texture2d_height(self) -> int: + """ + int: Maximum 2D texture height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, self._handle @@ -256,7 +193,10 @@ def maximum_texture2d_height(self): ) @property - def maximum_texture2d_linear_width(self): + def maximum_texture2d_linear_width(self) -> int: + """ + int: Maximum width for a 2D texture bound to linear memory. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, self._handle @@ -264,7 +204,10 @@ def maximum_texture2d_linear_width(self): ) @property - def maximum_texture2d_linear_height(self): + def maximum_texture2d_linear_height(self) -> int: + """ + int: Maximum height for a 2D texture bound to linear memory. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, self._handle @@ -272,7 +215,10 @@ def maximum_texture2d_linear_height(self): ) @property - def maximum_texture2d_linear_pitch(self): + def maximum_texture2d_linear_pitch(self) -> int: + """ + int: Maximum pitch in bytes for a 2D texture bound to linear memory. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, self._handle @@ -280,7 +226,10 @@ def maximum_texture2d_linear_pitch(self): ) @property - def maximum_texture2d_mipmapped_width(self): + def maximum_texture2d_mipmapped_width(self) -> int: + """ + int: Maximum mipmapped 2D texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, self._handle @@ -288,7 +237,10 @@ def maximum_texture2d_mipmapped_width(self): ) @property - def maximum_texture2d_mipmapped_height(self): + def maximum_texture2d_mipmapped_height(self) -> int: + """ + int: Maximum mipmapped 2D texture height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, self._handle @@ -296,7 +248,10 @@ def maximum_texture2d_mipmapped_height(self): ) @property - def maximum_texture3d_width(self): + def maximum_texture3d_width(self) -> int: + """ + int: Maximum 3D texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, self._handle @@ -304,7 +259,10 @@ def maximum_texture3d_width(self): ) @property - def maximum_texture3d_height(self): + def maximum_texture3d_height(self) -> int: + """ + int: Maximum 3D texture height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, self._handle @@ -312,7 +270,10 @@ def maximum_texture3d_height(self): ) @property - def maximum_texture3d_depth(self): + def maximum_texture3d_depth(self) -> int: + """ + int: Maximum 3D texture depth. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, self._handle @@ -320,7 +281,10 @@ def maximum_texture3d_depth(self): ) @property - def maximum_texture3d_width_alternate(self): + def maximum_texture3d_width_alternate(self) -> int: + """ + int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, self._handle @@ -328,7 +292,10 @@ def maximum_texture3d_width_alternate(self): ) @property - def maximum_texture3d_height_alternate(self): + def maximum_texture3d_height_alternate(self) -> int: + """ + int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, self._handle @@ -336,7 +303,10 @@ def maximum_texture3d_height_alternate(self): ) @property - def maximum_texture3d_depth_alternate(self): + def maximum_texture3d_depth_alternate(self) -> int: + """ + int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, self._handle @@ -344,7 +314,10 @@ def maximum_texture3d_depth_alternate(self): ) @property - def maximum_texturecubemap_width(self): + def maximum_texturecubemap_width(self) -> int: + """ + int: Maximum cubemap texture width or height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, self._handle @@ -352,7 +325,10 @@ def maximum_texturecubemap_width(self): ) @property - def maximum_texture1d_layered_width(self): + def maximum_texture1d_layered_width(self) -> int: + """ + int: Maximum 1D layered texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, self._handle @@ -360,7 +336,10 @@ def maximum_texture1d_layered_width(self): ) @property - def maximum_texture1d_layered_layers(self): + def maximum_texture1d_layered_layers(self) -> int: + """ + int: Maximum layers in a 1D layered texture. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, self._handle @@ -368,7 +347,10 @@ def maximum_texture1d_layered_layers(self): ) @property - def maximum_texture2d_layered_width(self): + def maximum_texture2d_layered_width(self) -> int: + """ + int: Maximum 2D layered texture width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, self._handle @@ -376,7 +358,10 @@ def maximum_texture2d_layered_width(self): ) @property - def maximum_texture2d_layered_height(self): + def maximum_texture2d_layered_height(self) -> int: + """ + int: Maximum 2D layered texture height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, self._handle @@ -384,7 +369,10 @@ def maximum_texture2d_layered_height(self): ) @property - def maximum_texture2d_layered_layers(self): + def maximum_texture2d_layered_layers(self) -> int: + """ + int: Maximum layers in a 2D layered texture. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, self._handle @@ -392,7 +380,10 @@ def maximum_texture2d_layered_layers(self): ) @property - def maximum_texturecubemap_layered_width(self): + def maximum_texturecubemap_layered_width(self) -> int: + """ + int: Maximum cubemap layered texture width or height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, self._handle @@ -400,7 +391,10 @@ def maximum_texturecubemap_layered_width(self): ) @property - def maximum_texturecubemap_layered_layers(self): + def maximum_texturecubemap_layered_layers(self) -> int: + """ + int: Maximum layers in a cubemap layered texture. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, self._handle @@ -408,7 +402,10 @@ def maximum_texturecubemap_layered_layers(self): ) @property - def maximum_surface1d_width(self): + def maximum_surface1d_width(self) -> int: + """ + int: Maximum 1D surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, self._handle @@ -416,7 +413,10 @@ def maximum_surface1d_width(self): ) @property - def maximum_surface2d_width(self): + def maximum_surface2d_width(self) -> int: + """ + int: Maximum 2D surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, self._handle @@ -424,7 +424,10 @@ def maximum_surface2d_width(self): ) @property - def maximum_surface2d_height(self): + def maximum_surface2d_height(self) -> int: + """ + int: Maximum 2D surface height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, self._handle @@ -432,7 +435,10 @@ def maximum_surface2d_height(self): ) @property - def maximum_surface3d_width(self): + def maximum_surface3d_width(self) -> int: + """ + int: Maximum 3D surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, self._handle @@ -440,7 +446,10 @@ def maximum_surface3d_width(self): ) @property - def maximum_surface3d_height(self): + def maximum_surface3d_height(self) -> int: + """ + int: Maximum 3D surface height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, self._handle @@ -448,7 +457,10 @@ def maximum_surface3d_height(self): ) @property - def maximum_surface3d_depth(self): + def maximum_surface3d_depth(self) -> int: + """ + int: Maximum 3D surface depth. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, self._handle @@ -456,7 +468,10 @@ def maximum_surface3d_depth(self): ) @property - def maximum_surface1d_layered_width(self): + def maximum_surface1d_layered_width(self) -> int: + """ + int: Maximum 1D layered surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, self._handle @@ -464,7 +479,10 @@ def maximum_surface1d_layered_width(self): ) @property - def maximum_surface1d_layered_layers(self): + def maximum_surface1d_layered_layers(self) -> int: + """ + int: Maximum layers in a 1D layered surface. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, self._handle @@ -472,7 +490,10 @@ def maximum_surface1d_layered_layers(self): ) @property - def maximum_surface2d_layered_width(self): + def maximum_surface2d_layered_width(self) -> int: + """ + int: Maximum 2D layered surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, self._handle @@ -480,7 +501,10 @@ def maximum_surface2d_layered_width(self): ) @property - def maximum_surface2d_layered_height(self): + def maximum_surface2d_layered_height(self) -> int: + """ + int: Maximum 2D layered surface height. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, self._handle @@ -488,7 +512,10 @@ def maximum_surface2d_layered_height(self): ) @property - def maximum_surface2d_layered_layers(self): + def maximum_surface2d_layered_layers(self) -> int: + """ + int: Maximum layers in a 2D layered surface. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, self._handle @@ -496,7 +523,10 @@ def maximum_surface2d_layered_layers(self): ) @property - def maximum_surfacecubemap_width(self): + def maximum_surfacecubemap_width(self) -> int: + """ + int: Maximum cubemap surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, self._handle @@ -504,7 +534,10 @@ def maximum_surfacecubemap_width(self): ) @property - def maximum_surfacecubemap_layered_width(self): + def maximum_surfacecubemap_layered_width(self) -> int: + """ + int: Maximum cubemap layered surface width. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, self._handle @@ -512,7 +545,10 @@ def maximum_surfacecubemap_layered_width(self): ) @property - def maximum_surfacecubemap_layered_layers(self): + def maximum_surfacecubemap_layered_layers(self) -> int: + """ + int: Maximum layers in a cubemap layered surface. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, self._handle @@ -520,7 +556,10 @@ def maximum_surfacecubemap_layered_layers(self): ) @property - def max_registers_per_block(self): + def max_registers_per_block(self) -> int: + """ + int: Maximum number of 32-bit registers available to a thread block. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, self._handle @@ -528,19 +567,29 @@ def max_registers_per_block(self): ) @property - def clock_rate(self): + def clock_rate(self) -> int: + """ + int: The typical clock frequency in kilohertz. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, self._handle) ) @property - def texture_alignment(self): + def texture_alignment(self) -> int: + """ + int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied + to texture fetches. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, self._handle) ) @property - def texture_pitch_alignment(self): + def texture_pitch_alignment(self) -> int: + """ + int: Pitch alignment requirement for 2D texture references bound to pitched memory. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, self._handle @@ -548,7 +597,11 @@ def texture_pitch_alignment(self): ) @property - def gpu_overlap(self): + def gpu_overlap(self) -> bool: + """ + bool: True if the device can concurrently copy memory between host and device while executing a kernel, False + if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, self._handle) @@ -556,7 +609,10 @@ def gpu_overlap(self): ) @property - def multiprocessor_count(self): + def multiprocessor_count(self) -> int: + """ + int: Number of multiprocessors on the device. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, self._handle @@ -564,7 +620,10 @@ def multiprocessor_count(self): ) @property - def kernel_exec_timeout(self): + def kernel_exec_timeout(self) -> bool: + """ + bool: True if there is a run time limit for kernels executed on the device, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -574,7 +633,10 @@ def kernel_exec_timeout(self): ) @property - def integrated(self): + def integrated(self) -> bool: + """ + bool: True if the device is integrated with the memory subsystem, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, self._handle) @@ -582,7 +644,10 @@ def integrated(self): ) @property - def can_map_host_memory(self): + def can_map_host_memory(self) -> bool: + """ + True if the device can map host memory into the CUDA address space, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -592,13 +657,19 @@ def can_map_host_memory(self): ) @property - def compute_mode(self): + def compute_mode(self) -> int: + """ + Compute mode that device is currently in. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, self._handle) ) @property - def concurrent_kernels(self): + def concurrent_kernels(self) -> bool: + """ + True if the device supports executing multiple kernels within the same context simultaneously, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -608,7 +679,11 @@ def concurrent_kernels(self): ) @property - def ecc_enabled(self): + def ecc_enabled(self) -> bool: + """ + True if error correction is enabled on the device, False if error correction is disabled or not supported by + the device. + """ return bool( handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, self._handle) @@ -616,25 +691,37 @@ def ecc_enabled(self): ) @property - def pci_bus_id(self): + def pci_bus_id(self) -> int: + """ + PCI bus identifier of the device. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, self._handle) ) @property - def pci_device_id(self): + def pci_device_id(self) -> int: + """ + PCI device (also known as slot) identifier of the device. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, self._handle) ) @property - def pci_domain_id(self): + def pci_domain_id(self) -> int: + """ + PCI domain identifier of the device. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, self._handle) ) @property - def tcc_driver(self): + def tcc_driver(self) -> bool: + """ + True if the device is using a TCC driver, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, self._handle) @@ -642,13 +729,19 @@ def tcc_driver(self): ) @property - def memory_clock_rate(self): + def memory_clock_rate(self) -> int: + """ + Peak memory clock frequency in kilohertz. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, self._handle) ) @property - def global_memory_bus_width(self): + def global_memory_bus_width(self) -> int: + """ + Global memory bus width in bits. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, self._handle @@ -656,13 +749,19 @@ def global_memory_bus_width(self): ) @property - def l2_cache_size(self): + def l2_cache_size(self) -> int: + """ + Size of L2 cache in bytes, 0 if the device doesn't have L2 cache. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, self._handle) ) @property - def max_threads_per_multiprocessor(self): + def max_threads_per_multiprocessor(self) -> int: + """ + Maximum resident threads per multiprocessor. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, self._handle @@ -670,7 +769,10 @@ def max_threads_per_multiprocessor(self): ) @property - def unified_addressing(self): + def unified_addressing(self) -> bool: + """ + True if the device shares a unified address space with the host, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -680,7 +782,10 @@ def unified_addressing(self): ) @property - def compute_capability_major(self): + def compute_capability_major(self) -> int: + """ + Major compute capability version number. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, self._handle @@ -688,7 +793,10 @@ def compute_capability_major(self): ) @property - def compute_capability_minor(self): + def compute_capability_minor(self) -> int: + """ + Minor compute capability version number. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, self._handle @@ -696,7 +804,11 @@ def compute_capability_minor(self): ) @property - def global_l1_cache_supported(self): + def global_l1_cache_supported(self) -> bool: + """ + True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported + by the device. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -706,7 +818,11 @@ def global_l1_cache_supported(self): ) @property - def local_l1_cache_supported(self): + def local_l1_cache_supported(self) -> bool: + """ + True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported + by the device. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -716,7 +832,10 @@ def local_l1_cache_supported(self): ) @property - def max_shared_memory_per_multiprocessor(self): + def max_shared_memory_per_multiprocessor(self) -> int: + """ + Maximum amount of shared memory available to a multiprocessor in bytes. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, self._handle @@ -724,7 +843,10 @@ def max_shared_memory_per_multiprocessor(self): ) @property - def max_registers_per_multiprocessor(self): + def max_registers_per_multiprocessor(self) -> int: + """ + Maximum number of 32-bit registers available to a multiprocessor. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, self._handle @@ -732,7 +854,11 @@ def max_registers_per_multiprocessor(self): ) @property - def managed_memory(self): + def managed_memory(self) -> bool: + """ + True if device supports allocating managed memory on this system, False if allocating managed memory is not + supported by the device on this system. + """ return bool( handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, self._handle) @@ -740,7 +866,10 @@ def managed_memory(self): ) @property - def multi_gpu_board(self): + def multi_gpu_board(self) -> bool: + """ + True if device is on a multi-GPU board, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, self._handle) @@ -748,7 +877,10 @@ def multi_gpu_board(self): ) @property - def multi_gpu_board_group_id(self): + def multi_gpu_board_group_id(self) -> int: + """ + Unique identifier for a group of devices associated with the same board. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, self._handle @@ -756,7 +888,10 @@ def multi_gpu_board_group_id(self): ) @property - def host_native_atomic_supported(self): + def host_native_atomic_supported(self) -> bool: + """ + True if Link between the device and the host supports native atomic operations, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -766,7 +901,11 @@ def host_native_atomic_supported(self): ) @property - def single_to_double_precision_perf_ratio(self): + def single_to_double_precision_perf_ratio(self) -> int: + """ + Ratio of single precision performance (in floating-point operations per second) to double precision + performance. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, self._handle @@ -774,7 +913,11 @@ def single_to_double_precision_perf_ratio(self): ) @property - def pageable_memory_access(self): + def pageable_memory_access(self) -> bool: + """ + True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, + False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -784,7 +927,10 @@ def pageable_memory_access(self): ) @property - def concurrent_managed_access(self): + def concurrent_managed_access(self) -> bool: + """ + True if device can coherently access managed memory concurrently with the CPU, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -794,7 +940,10 @@ def concurrent_managed_access(self): ) @property - def compute_preemption_supported(self): + def compute_preemption_supported(self) -> bool: + """ + True if device supports Compute Preemption, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -804,7 +953,10 @@ def compute_preemption_supported(self): ) @property - def can_use_host_pointer_for_registered_mem(self): + def can_use_host_pointer_for_registered_mem(self) -> bool: + """ + True if device can access host registered memory at the same virtual address as the CPU, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -814,7 +966,10 @@ def can_use_host_pointer_for_registered_mem(self): ) @property - def max_shared_memory_per_block_optin(self): + def max_shared_memory_per_block_optin(self) -> int: + """ + The maximum per block shared memory size supported on this device. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, self._handle @@ -822,7 +977,10 @@ def max_shared_memory_per_block_optin(self): ) @property - def pageable_memory_access_uses_host_page_tables(self): + def pageable_memory_access_uses_host_page_tables(self) -> bool: + """ + True if device accesses pageable memory via the host's page tables, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -833,7 +991,10 @@ def pageable_memory_access_uses_host_page_tables(self): ) @property - def direct_managed_mem_access_from_host(self): + def direct_managed_mem_access_from_host(self) -> bool: + """ + True if the host can directly access managed memory on the device without migration, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -843,7 +1004,11 @@ def direct_managed_mem_access_from_host(self): ) @property - def virtual_memory_management_supported(self): + def virtual_memory_management_supported(self) -> bool: + """ + True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap + and related APIs, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -853,7 +1018,11 @@ def virtual_memory_management_supported(self): ) @property - def handle_type_posix_file_descriptor_supported(self): + def handle_type_posix_file_descriptor_supported(self) -> bool: + """ + True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, + False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -864,7 +1033,11 @@ def handle_type_posix_file_descriptor_supported(self): ) @property - def handle_type_win32_handle_supported(self): + def handle_type_win32_handle_supported(self) -> bool: + """ + True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, + False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -874,7 +1047,11 @@ def handle_type_win32_handle_supported(self): ) @property - def handle_type_win32_kmt_handle_supported(self): + def handle_type_win32_kmt_handle_supported(self) -> bool: + """ + True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, + False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -884,7 +1061,10 @@ def handle_type_win32_kmt_handle_supported(self): ) @property - def max_blocks_per_multiprocessor(self): + def max_blocks_per_multiprocessor(self) -> int: + """ + Maximum number of thread blocks that can reside on a multiprocessor. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, self._handle @@ -892,7 +1072,10 @@ def max_blocks_per_multiprocessor(self): ) @property - def generic_compression_supported(self): + def generic_compression_supported(self) -> bool: + """ + True if device supports compressible memory allocation via cuMemCreate, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -902,7 +1085,10 @@ def generic_compression_supported(self): ) @property - def max_persisting_l2_cache_size(self): + def max_persisting_l2_cache_size(self) -> int: + """ + Maximum L2 persisting lines capacity setting in bytes. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, self._handle @@ -910,7 +1096,10 @@ def max_persisting_l2_cache_size(self): ) @property - def max_access_policy_window_size(self): + def max_access_policy_window_size(self) -> int: + """ + Maximum value of CUaccessPolicyWindow::num_bytes. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, self._handle @@ -918,7 +1107,10 @@ def max_access_policy_window_size(self): ) @property - def gpu_direct_rdma_with_cuda_vmm_supported(self): + def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool: + """ + True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -928,7 +1120,10 @@ def gpu_direct_rdma_with_cuda_vmm_supported(self): ) @property - def reserved_shared_memory_per_block(self): + def reserved_shared_memory_per_block(self) -> int: + """ + Amount of shared memory per block reserved by CUDA driver in bytes. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, self._handle @@ -936,7 +1131,10 @@ def reserved_shared_memory_per_block(self): ) @property - def sparse_cuda_array_supported(self): + def sparse_cuda_array_supported(self) -> bool: + """ + True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -946,7 +1144,11 @@ def sparse_cuda_array_supported(self): ) @property - def read_only_host_register_supported(self): + def read_only_host_register_supported(self) -> bool: + """ + True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register + memory that must be mapped as read-only to the GPU, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -956,7 +1158,10 @@ def read_only_host_register_supported(self): ) @property - def memory_pools_supported(self): + def memory_pools_supported(self) -> bool: + """ + True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -966,7 +1171,10 @@ def memory_pools_supported(self): ) @property - def gpu_direct_rdma_supported(self): + def gpu_direct_rdma_supported(self) -> bool: + """ + True if device supports GPUDirect RDMA APIs, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -976,7 +1184,11 @@ def gpu_direct_rdma_supported(self): ) @property - def gpu_direct_rdma_flush_writes_options(self): + def gpu_direct_rdma_flush_writes_options(self) -> int: + """ + The returned attribute shall be interpreted as a bitmask, where the individual bits are described by + the CUflushGPUDirectRDMAWritesOptions enum. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, self._handle @@ -984,7 +1196,11 @@ def gpu_direct_rdma_flush_writes_options(self): ) @property - def gpu_direct_rdma_writes_ordering(self): + def gpu_direct_rdma_writes_ordering(self) -> int: + """ + GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated + by the returned attribute. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, self._handle @@ -992,7 +1208,10 @@ def gpu_direct_rdma_writes_ordering(self): ) @property - def mempool_supported_handle_types(self): + def mempool_supported_handle_types(self) -> int: + """ + Bitmask of handle types supported with mempool based IPC. + """ return handle_return( driver.cuDeviceGetAttribute( driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, self._handle @@ -1000,7 +1219,10 @@ def mempool_supported_handle_types(self): ) @property - def deferred_mapping_cuda_array_supported(self): + def deferred_mapping_cuda_array_supported(self) -> bool: + """ + True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -1010,19 +1232,28 @@ def deferred_mapping_cuda_array_supported(self): ) @property - def numa_config(self): + def numa_config(self) -> int: + """ + NUMA configuration of a device. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, self._handle) ) @property - def numa_id(self): + def numa_id(self) -> int: + """ + NUMA node ID of the GPU memory. + """ return handle_return( driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, self._handle) ) @property - def multicast_supported(self): + def multicast_supported(self) -> bool: + """ + True if device supports switch multicast and reduction operations, False if not. + """ return bool( handle_return( driver.cuDeviceGetAttribute( @@ -1032,9 +1263,6 @@ def multicast_supported(self): ) -# ruff: enable - - class Device: """Represent a GPU and act as an entry point for cuda.core features. @@ -1147,7 +1375,7 @@ def name(self) -> str: @property def properties(self) -> DeviceProperties: - """Return information about the compute-device.""" + """Return a :obj:`~_device.DeviceProperties` class with information about the device.""" if self._properties is None: self._properties = DeviceProperties._init(self._id) From 9f9512592f2228b048bf5ff49f7ae6a1e2b4b71f Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 28 Jan 2025 09:37:12 -0800 Subject: [PATCH 10/10] address comments --- cuda_core/cuda/core/experimental/_device.py | 696 ++++---------------- cuda_core/tests/test_device.py | 8 + 2 files changed, 144 insertions(+), 560 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index e2894ecc..cfbda8ef 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -24,108 +24,85 @@ class DeviceProperties: def __init__(self): raise RuntimeError("DeviceProperties should not be instantiated directly") - slots = "_handle" + __slots__ = "_handle" def _init(handle): self = DeviceProperties.__new__(DeviceProperties) self._handle = handle return self + def _get_attribute(self, dev_attr): + return handle_return(driver.cuDeviceGetAttribute(dev_attr, self._handle)) + @property def max_threads_per_block(self) -> int: """ int: Maximum number of threads per block. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK) @property def max_block_dim_x(self) -> int: """ int: Maximum x-dimension of a block. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X) @property def max_block_dim_y(self) -> int: """ int: Maximum y-dimension of a block. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y) @property def max_block_dim_z(self) -> int: """ int: Maximum z-dimension of a block. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z) @property def max_grid_dim_x(self) -> int: """ int: Maximum x-dimension of a grid. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X) @property def max_grid_dim_y(self) -> int: """ int: Maximum y-dimension of a grid. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y) @property def max_grid_dim_z(self) -> int: """ int: Maximum z-dimension of a grid. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z) @property def max_shared_memory_per_block(self) -> int: """ int: Maximum amount of shared memory available to a thread block in bytes. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK) @property def total_constant_memory(self) -> int: """ int: Memory available on device for __constant__ variables in a CUDA C kernel in bytes. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY) @property def warp_size(self) -> int: """ int: Warp size in threads. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE) @property def max_pitch(self) -> int: @@ -133,675 +110,438 @@ def max_pitch(self) -> int: int: Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch(). """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH) @property def maximum_texture1d_width(self) -> int: """ int: Maximum 1D texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH) @property def maximum_texture1d_linear_width(self) -> int: """ int: Maximum width for a 1D texture bound to linear memory. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH) @property def maximum_texture1d_mipmapped_width(self) -> int: """ int: Maximum mipmapped 1D texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH) @property def maximum_texture2d_width(self) -> int: """ int: Maximum 2D texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH) @property def maximum_texture2d_height(self) -> int: """ int: Maximum 2D texture height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT) @property def maximum_texture2d_linear_width(self) -> int: """ int: Maximum width for a 2D texture bound to linear memory. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH) @property def maximum_texture2d_linear_height(self) -> int: """ int: Maximum height for a 2D texture bound to linear memory. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT) @property def maximum_texture2d_linear_pitch(self) -> int: """ int: Maximum pitch in bytes for a 2D texture bound to linear memory. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH) @property def maximum_texture2d_mipmapped_width(self) -> int: """ int: Maximum mipmapped 2D texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH) @property def maximum_texture2d_mipmapped_height(self) -> int: """ int: Maximum mipmapped 2D texture height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT) @property def maximum_texture3d_width(self) -> int: """ int: Maximum 3D texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH) @property def maximum_texture3d_height(self) -> int: """ int: Maximum 3D texture height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT) @property def maximum_texture3d_depth(self) -> int: """ int: Maximum 3D texture depth. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH) @property def maximum_texture3d_width_alternate(self) -> int: """ int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE) @property def maximum_texture3d_height_alternate(self) -> int: """ int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE) @property def maximum_texture3d_depth_alternate(self) -> int: """ int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE) @property def maximum_texturecubemap_width(self) -> int: """ int: Maximum cubemap texture width or height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH) @property def maximum_texture1d_layered_width(self) -> int: """ int: Maximum 1D layered texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH) @property def maximum_texture1d_layered_layers(self) -> int: """ int: Maximum layers in a 1D layered texture. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS) @property def maximum_texture2d_layered_width(self) -> int: """ int: Maximum 2D layered texture width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH) @property def maximum_texture2d_layered_height(self) -> int: """ int: Maximum 2D layered texture height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT) @property def maximum_texture2d_layered_layers(self) -> int: """ int: Maximum layers in a 2D layered texture. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS) @property def maximum_texturecubemap_layered_width(self) -> int: """ int: Maximum cubemap layered texture width or height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH) @property def maximum_texturecubemap_layered_layers(self) -> int: """ int: Maximum layers in a cubemap layered texture. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS) @property def maximum_surface1d_width(self) -> int: """ int: Maximum 1D surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH) @property def maximum_surface2d_width(self) -> int: """ int: Maximum 2D surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH) @property def maximum_surface2d_height(self) -> int: """ int: Maximum 2D surface height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT) @property def maximum_surface3d_width(self) -> int: """ int: Maximum 3D surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH) @property def maximum_surface3d_height(self) -> int: """ int: Maximum 3D surface height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT) @property def maximum_surface3d_depth(self) -> int: """ int: Maximum 3D surface depth. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH) @property def maximum_surface1d_layered_width(self) -> int: """ int: Maximum 1D layered surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH) @property def maximum_surface1d_layered_layers(self) -> int: """ int: Maximum layers in a 1D layered surface. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS) @property def maximum_surface2d_layered_width(self) -> int: """ int: Maximum 2D layered surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH) @property def maximum_surface2d_layered_height(self) -> int: """ int: Maximum 2D layered surface height. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT) @property def maximum_surface2d_layered_layers(self) -> int: """ int: Maximum layers in a 2D layered surface. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS) @property def maximum_surfacecubemap_width(self) -> int: """ int: Maximum cubemap surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH) @property def maximum_surfacecubemap_layered_width(self) -> int: """ int: Maximum cubemap layered surface width. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH) @property def maximum_surfacecubemap_layered_layers(self) -> int: """ int: Maximum layers in a cubemap layered surface. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS) @property def max_registers_per_block(self) -> int: """ int: Maximum number of 32-bit registers available to a thread block. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK) @property def clock_rate(self) -> int: """ int: The typical clock frequency in kilohertz. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE) @property def texture_alignment(self) -> int: """ - int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied - to texture fetches. + int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset + applied to texture fetches. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT) @property def texture_pitch_alignment(self) -> int: """ int: Pitch alignment requirement for 2D texture references bound to pitched memory. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT) @property def gpu_overlap(self) -> bool: """ - bool: True if the device can concurrently copy memory between host and device while executing a kernel, False - if not. + bool: True if the device can concurrently copy memory between host and device while executing a kernel, + False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, self._handle) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP)) @property def multiprocessor_count(self) -> int: """ int: Number of multiprocessors on the device. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT) @property def kernel_exec_timeout(self) -> bool: """ bool: True if there is a run time limit for kernels executed on the device, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT)) @property def integrated(self) -> bool: """ bool: True if the device is integrated with the memory subsystem, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, self._handle) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED)) @property def can_map_host_memory(self) -> bool: """ - True if the device can map host memory into the CUDA address space, False if not. + bool: True if the device can map host memory into the CUDA address space, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY)) @property def compute_mode(self) -> int: """ - Compute mode that device is currently in. + int: Compute mode that device is currently in. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE) @property def concurrent_kernels(self) -> bool: """ - True if the device supports executing multiple kernels within the same context simultaneously, False if not. + bool: True if the device supports executing multiple kernels within the same context simultaneously, + False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS)) @property def ecc_enabled(self) -> bool: """ - True if error correction is enabled on the device, False if error correction is disabled or not supported by - the device. + bool: True if error correction is enabled on the device, False if error correction is disabled or not + supported by the device. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, self._handle) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED)) @property def pci_bus_id(self) -> int: """ - PCI bus identifier of the device. + int: PCI bus identifier of the device. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID) @property def pci_device_id(self) -> int: """ - PCI device (also known as slot) identifier of the device. + int: PCI device (also known as slot) identifier of the device. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID) @property def pci_domain_id(self) -> int: """ - PCI domain identifier of the device. + int: PCI domain identifier of the device. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID) @property def tcc_driver(self) -> bool: """ - True if the device is using a TCC driver, False if not. + bool: True if the device is using a TCC driver, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, self._handle) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER)) @property def memory_clock_rate(self) -> int: """ - Peak memory clock frequency in kilohertz. + int: Peak memory clock frequency in kilohertz. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE) @property def global_memory_bus_width(self) -> int: """ - Global memory bus width in bits. + int: Global memory bus width in bits. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH) @property def l2_cache_size(self) -> int: """ - Size of L2 cache in bytes, 0 if the device doesn't have L2 cache. + int: Size of L2 cache in bytes, 0 if the device doesn't have L2 cache. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE) @property def max_threads_per_multiprocessor(self) -> int: """ - Maximum resident threads per multiprocessor. + int: Maximum resident threads per multiprocessor. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR) @property def unified_addressing(self) -> bool: """ - True if the device shares a unified address space with the host, False if not. + bool: True if the device shares a unified address space with the host, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) @property def compute_capability_major(self) -> int: """ - Major compute capability version number. + int: Major compute capability version number. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) @property def compute_capability_minor(self) -> int: """ - Minor compute capability version number. + int: Minor compute capability version number. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR) @property def global_l1_cache_supported(self) -> bool: @@ -809,13 +549,7 @@ def global_l1_cache_supported(self) -> bool: True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported by the device. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED)) @property def local_l1_cache_supported(self) -> bool: @@ -823,35 +557,21 @@ def local_l1_cache_supported(self) -> bool: True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported by the device. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED)) @property def max_shared_memory_per_multiprocessor(self) -> int: """ Maximum amount of shared memory available to a multiprocessor in bytes. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR) @property def max_registers_per_multiprocessor(self) -> int: """ Maximum number of 32-bit registers available to a multiprocessor. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR) @property def managed_memory(self) -> bool: @@ -859,46 +579,28 @@ def managed_memory(self) -> bool: True if device supports allocating managed memory on this system, False if allocating managed memory is not supported by the device on this system. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, self._handle) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY)) @property def multi_gpu_board(self) -> bool: """ True if device is on a multi-GPU board, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, self._handle) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD)) @property def multi_gpu_board_group_id(self) -> int: """ Unique identifier for a group of devices associated with the same board. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID) @property def host_native_atomic_supported(self) -> bool: """ True if Link between the device and the host supports native atomic operations, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED)) @property def single_to_double_precision_perf_ratio(self) -> int: @@ -906,11 +608,7 @@ def single_to_double_precision_perf_ratio(self) -> int: Ratio of single precision performance (in floating-point operations per second) to double precision performance. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO) @property def pageable_memory_access(self) -> bool: @@ -918,39 +616,21 @@ def pageable_memory_access(self) -> bool: True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS)) @property def concurrent_managed_access(self) -> bool: """ True if device can coherently access managed memory concurrently with the CPU, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) @property def compute_preemption_supported(self) -> bool: """ True if device supports Compute Preemption, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED)) @property def can_use_host_pointer_for_registered_mem(self) -> bool: @@ -958,11 +638,7 @@ def can_use_host_pointer_for_registered_mem(self) -> bool: True if device can access host registered memory at the same virtual address as the CPU, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM) ) @property @@ -970,11 +646,7 @@ def max_shared_memory_per_block_optin(self) -> int: """ The maximum per block shared memory size supported on this device. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN) @property def pageable_memory_access_uses_host_page_tables(self) -> bool: @@ -982,11 +654,8 @@ def pageable_memory_access_uses_host_page_tables(self) -> bool: True if device accesses pageable memory via the host's page tables, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, - self._handle, - ) + self._get_attribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES ) ) @@ -996,11 +665,7 @@ def direct_managed_mem_access_from_host(self) -> bool: True if the host can directly access managed memory on the device without migration, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST) ) @property @@ -1010,11 +675,7 @@ def virtual_memory_management_supported(self) -> bool: and related APIs, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED) ) @property @@ -1024,11 +685,8 @@ def handle_type_posix_file_descriptor_supported(self) -> bool: False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED, - self._handle, - ) + self._get_attribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED ) ) @@ -1039,11 +697,7 @@ def handle_type_win32_handle_supported(self) -> bool: False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED) ) @property @@ -1053,11 +707,7 @@ def handle_type_win32_kmt_handle_supported(self) -> bool: False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED) ) @property @@ -1065,46 +715,28 @@ def max_blocks_per_multiprocessor(self) -> int: """ Maximum number of thread blocks that can reside on a multiprocessor. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR) @property def generic_compression_supported(self) -> bool: """ True if device supports compressible memory allocation via cuMemCreate, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED)) @property def max_persisting_l2_cache_size(self) -> int: """ Maximum L2 persisting lines capacity setting in bytes. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE) @property def max_access_policy_window_size(self) -> int: """ Maximum value of CUaccessPolicyWindow::num_bytes. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE) @property def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool: @@ -1112,11 +744,7 @@ def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool: True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED) ) @property @@ -1124,24 +752,14 @@ def reserved_shared_memory_per_block(self) -> int: """ Amount of shared memory per block reserved by CUDA driver in bytes. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK) @property def sparse_cuda_array_supported(self) -> bool: """ True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED)) @property def read_only_host_register_supported(self) -> bool: @@ -1150,11 +768,7 @@ def read_only_host_register_supported(self) -> bool: memory that must be mapped as read-only to the GPU, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED) ) @property @@ -1162,26 +776,14 @@ def memory_pools_supported(self) -> bool: """ True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED)) @property def gpu_direct_rdma_supported(self) -> bool: """ True if device supports GPUDirect RDMA APIs, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED)) @property def gpu_direct_rdma_flush_writes_options(self) -> int: @@ -1189,11 +791,7 @@ def gpu_direct_rdma_flush_writes_options(self) -> int: The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS) @property def gpu_direct_rdma_writes_ordering(self) -> int: @@ -1201,22 +799,14 @@ def gpu_direct_rdma_writes_ordering(self) -> int: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING) @property def mempool_supported_handle_types(self) -> int: """ Bitmask of handle types supported with mempool based IPC. """ - return handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, self._handle - ) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES) @property def deferred_mapping_cuda_array_supported(self) -> bool: @@ -1224,11 +814,7 @@ def deferred_mapping_cuda_array_supported(self) -> bool: True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not. """ return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED, self._handle - ) - ) + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED) ) @property @@ -1236,31 +822,21 @@ def numa_config(self) -> int: """ NUMA configuration of a device. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG) @property def numa_id(self) -> int: """ NUMA node ID of the GPU memory. """ - return handle_return( - driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, self._handle) - ) + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID) @property def multicast_supported(self) -> bool: """ True if device supports switch multicast and reduction operations, False if not. """ - return bool( - handle_return( - driver.cuDeviceGetAttribute( - driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED, self._handle - ) - ) - ) + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED)) class Device: diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 30287a38..691fa47c 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -198,3 +198,11 @@ def test_compute_capability(): def test_device_property_types(property_name, expected_type): device = Device() assert isinstance(getattr(device.properties, property_name), expected_type) + + +def test_device_properties_complete(): + device = Device() + live_props = set(attr for attr in dir(device.properties) if not attr.startswith("_")) + tab_props = set(attr for attr, _ in cuda_base_properties) + assert len(tab_props) == len(cuda_base_properties) # Ensure no duplicates. + assert tab_props == live_props # Ensure exact match.