From fdc67f7fdadc19899f1c3068bc1e2ae1985d59d1 Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Fri, 17 Jan 2025 11:44:42 -0800
Subject: [PATCH 01/10] device porperties first review

---
 cuda_core/cuda/core/experimental/_device.py | 304 +++++++++++++++++++-
 cuda_core/tests/test_device.py              |  97 +++++++
 2 files changed, 397 insertions(+), 4 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 747174b2..d5875bba 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -14,6 +14,299 @@
 _tls_lock = threading.Lock()
 
 
+class DeviceProperties:
+    """
+    Represents the properties of a CUDA device.
+
+    Attributes
+    ----------
+    name : str
+        ASCII string identifying the device.
+    uuid : cudaUUID_t
+        16-byte unique identifier.
+    total_global_mem : int
+        Total amount of global memory available on the device in bytes.
+    shared_mem_per_block : int
+        Maximum amount of shared memory available to a thread block in bytes.
+    regs_per_block : int
+        Maximum number of 32-bit registers available to a thread block.
+    warp_size : int
+        Warp size in threads.
+    mem_pitch : int
+        Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through
+        cudaMallocPitch().
+    max_threads_per_block : int
+        Maximum number of threads per block.
+    max_threads_dim : tuple
+        Maximum size of each dimension of a block.
+    max_grid_size : tuple
+        Maximum size of each dimension of a grid.
+    clock_rate : int
+        Clock frequency in kilohertz.
+    total_const_mem : int
+        Total amount of constant memory available on the device in bytes.
+    major : int
+        Major revision number defining the device's compute capability.
+    minor : int
+        Minor revision number defining the device's compute capability.
+    texture_alignment : int
+        Alignment requirement; texture base addresses that are aligned to textureAlignment bytes do not need an
+        offset applied to texture fetches.
+    texture_pitch_alignment : int
+        Pitch alignment requirement for 2D texture references that are bound to pitched memory.
+    device_overlap : int
+        1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not.
+    multi_processor_count : int
+        Number of multiprocessors on the device.
+    kernel_exec_timeout_enabled : int
+        1 if there is a run time limit for kernels executed on the device, or 0 if not.
+    integrated : int
+        1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component.
+    can_map_host_memory : int
+        1 if the device can map host memory into the CUDA address space for use with
+        cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not.
+    compute_mode : int
+        Compute mode that the device is currently in.
+    max_texture_1d : int
+        Maximum 1D texture size.
+    max_texture_1d_mipmap : int
+        Maximum 1D mipmapped texture size.
+    max_texture_1d_linear : int
+        Maximum 1D texture size for textures bound to linear memory.
+    max_texture_2d : tuple
+        Maximum 2D texture dimensions.
+    max_texture_2d_mipmap : tuple
+        Maximum 2D mipmapped texture dimensions.
+    max_texture_2d_linear : tuple
+        Maximum 2D texture dimensions for 2D textures bound to pitch linear memory.
+    max_texture_2d_gather : tuple
+        Maximum 2D texture dimensions if texture gather operations have to be performed.
+    max_texture_3d : tuple
+        Maximum 3D texture dimensions.
+    max_texture_3d_alt : tuple
+        Maximum alternate 3D texture dimensions.
+    max_texture_cubemap : int
+        Maximum cubemap texture width or height.
+    max_texture_1d_layered : tuple
+        Maximum 1D layered texture dimensions.
+    max_texture_2d_layered : tuple
+        Maximum 2D layered texture dimensions.
+    max_texture_cubemap_layered : tuple
+        Maximum cubemap layered texture dimensions.
+    max_surface_1d : int
+        Maximum 1D surface size.
+    max_surface_2d : tuple
+        Maximum 2D surface dimensions.
+    max_surface_3d : tuple
+        Maximum 3D surface dimensions.
+    max_surface_1d_layered : tuple
+        Maximum 1D layered surface dimensions.
+    max_surface_2d_layered : tuple
+        Maximum 2D layered surface dimensions.
+    max_surface_cubemap : int
+        Maximum cubemap surface width or height.
+    max_surface_cubemap_layered : tuple
+        Maximum cubemap layered surface dimensions.
+    surface_alignment : int
+        Alignment requirements for surfaces.
+    concurrent_kernels : int
+        1 if the device supports executing multiple kernels within the same context simultaneously, or 0 if
+        not.
+    ecc_enabled : int
+        1 if the device has ECC support turned on, or 0 if not.
+    pci_bus_id : int
+        PCI bus identifier of the device.
+    pci_device_id : int
+        PCI device (sometimes called slot) identifier of the device.
+    pci_domain_id : int
+        PCI domain identifier of the device.
+    tcc_driver : int
+        1 if the device is using a TCC driver or 0 if not.
+    async_engine_count : int
+        1 when the device can concurrently copy memory between host and device while executing a kernel.
+        It is 2 when the device can concurrently copy memory between host and device in both directions
+        and execute a kernel at the same time. It is 0 if neither of these is supported.
+    unified_addressing : int
+        1 if the device shares a unified address space with the host and 0 otherwise.
+    memory_clock_rate : int
+        Peak memory clock frequency in kilohertz.
+    memory_bus_width : int
+        Memory bus width in bits.
+    l2_cache_size : int
+        L2 cache size in bytes.
+    persisting_l2_cache_max_size : int
+        L2 cache's maximum persisting lines size in bytes.
+    max_threads_per_multi_processor : int
+        Number of maximum resident threads per multiprocessor.
+    stream_priorities_supported : int
+        1 if the device supports stream priorities, or 0 if it is not supported.
+    global_l1_cache_supported : int
+        1 if the device supports caching of globals in L1 cache, or 0 if it is not supported.
+    local_l1_cache_supported : int
+        1 if the device supports caching of locals in L1 cache, or 0 if it is not supported.
+    shared_mem_per_multiprocessor : int
+        Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all
+        thread blocks simultaneously resident on a multiprocessor.
+    regs_per_multiprocessor : int
+        Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread
+        blocks simultaneously resident on a multiprocessor.
+    managed_memory : int
+        1 if the device supports allocating managed memory on this system, or 0 if it is not supported.
+    is_multi_gpu_board : int
+        1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not.
+    multi_gpu_board_group_id : int
+        Unique identifier for a group of devices associated with the same board. Devices on the same
+        multi-GPU board will share the same identifier.
+    single_to_double_precision_perf_ratio : int
+        Ratio of single precision performance (in floating-point operations per second) to double precision
+        performance.
+    pageable_memory_access : int
+        1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it,
+        and 0 otherwise.
+    concurrent_managed_access : int
+        1 if the device can coherently access managed memory concurrently with the CPU, and 0 otherwise.
+    compute_preemption_supported : int
+        1 if the device supports Compute Preemption, and 0 otherwise.
+    can_use_host_pointer_for_registered_mem : int
+        1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise.
+    cooperative_launch : int
+        1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise.
+    cooperative_multi_device_launch : int
+        1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0
+        otherwise.
+    pageable_memory_access_uses_host_page_tables : int
+        1 if the device accesses pageable memory via the host's page tables, and 0 otherwise.
+    direct_managed_mem_access_from_host : int
+        1 if the host can directly access managed memory on the device without migration, and 0 otherwise.
+    access_policy_max_window_size : int
+        Maximum value of cudaAccessPolicyWindow::num_bytes.
+    reserved_shared_mem_per_block : int
+        Shared memory reserved by CUDA driver per block in bytes.
+    host_register_supported : int
+        1 if the device supports host memory registration via cudaHostRegister, and 0 otherwise.
+    sparse_cuda_array_supported : int
+        1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise.
+    host_register_read_only_supported : int
+        1 if the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory
+        that must be mapped as read-only to the GPU.
+    timeline_semaphore_interop_supported : int
+        1 if external timeline semaphore interop is supported on the device, 0 otherwise.
+    memory_pools_supported : int
+        1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise.
+    gpu_direct_rdma_supported : int
+        1 if the device supports GPUDirect RDMA APIs, 0 otherwise.
+    gpu_direct_rdma_flush_writes_options : int
+        Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum.
+    gpu_direct_rdma_writes_ordering : int
+        See the cudaGPUDirectRDMAWritesOrdering enum for numerical values.
+    memory_pool_supported_handle_types : int
+        Bitmask of handle types supported with mempool-based IPC.
+    deferred_mapping_cuda_array_supported : int
+        1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.
+    ipc_event_supported : int
+        1 if the device supports IPC Events, and 0 otherwise.
+    unified_function_pointers : int
+        1 if the device supports unified pointers, and 0 otherwise.
+    """
+
+    def _init(device_id):
+        self = DeviceProperties.__new__(DeviceProperties)
+
+        prop = handle_return(runtime.cudaGetDeviceProperties(device_id))
+
+        self.name = prop.name.decode("utf-8")
+        self.uuid = prop.uuid.bytes
+        self.total_global_mem = prop.totalGlobalMem
+        self.shared_mem_per_block = prop.sharedMemPerBlock
+        self.regs_per_block = prop.regsPerBlock
+        self.warp_size = prop.warpSize
+        self.mem_pitch = prop.memPitch
+        self.max_threads_per_block = prop.maxThreadsPerBlock
+        self.max_threads_dim = tuple(prop.maxThreadsDim)
+        self.max_grid_size = tuple(prop.maxGridSize)
+        self.clock_rate = prop.clockRate
+        self.total_const_mem = prop.totalConstMem
+        self.major = prop.major
+        self.minor = prop.minor
+        self.texture_alignment = prop.textureAlignment
+        self.texture_pitch_alignment = prop.texturePitchAlignment
+        self.device_overlap = prop.deviceOverlap
+        self.multi_processor_count = prop.multiProcessorCount
+        self.kernel_exec_timeout_enabled = prop.kernelExecTimeoutEnabled
+        self.integrated = prop.integrated
+        self.can_map_host_memory = prop.canMapHostMemory
+        self.compute_mode = prop.computeMode
+        self.max_texture_1d = prop.maxTexture1D
+        self.max_texture_1d_mipmap = prop.maxTexture1DMipmap
+        self.max_texture_1d_linear = prop.maxTexture1DLinear
+        self.max_texture_2d = tuple(prop.maxTexture2D)
+        self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap)
+        self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear)
+        self.max_texture_2d_gather = tuple(prop.maxTexture2DGather)
+        self.max_texture_3d = tuple(prop.maxTexture3D)
+        self.max_texture_3d_alt = tuple(prop.maxTexture3DAlt)
+        self.max_texture_cubemap = prop.maxTextureCubemap
+        self.max_texture_1d_layered = tuple(prop.maxTexture1DLayered)
+        self.max_texture_2d_layered = tuple(prop.maxTexture2DLayered)
+        self.max_texture_cubemap_layered = tuple(prop.maxTextureCubemapLayered)
+        self.max_surface_1d = prop.maxSurface1D
+        self.max_surface_2d = tuple(prop.maxSurface2D)
+        self.max_surface_3d = tuple(prop.maxSurface3D)
+        self.max_surface_1d_layered = tuple(prop.maxSurface1DLayered)
+        self.max_surface_2d_layered = tuple(prop.maxSurface2DLayered)
+        self.max_surface_cubemap = prop.maxSurfaceCubemap
+        self.max_surface_cubemap_layered = tuple(prop.maxSurfaceCubemapLayered)
+        self.surface_alignment = prop.surfaceAlignment
+        self.concurrent_kernels = prop.concurrentKernels
+        self.ecc_enabled = prop.ECCEnabled
+        self.pci_bus_id = prop.pciBusID
+        self.pci_device_id = prop.pciDeviceID
+        self.pci_domain_id = prop.pciDomainID
+        self.tcc_driver = prop.tccDriver
+        self.async_engine_count = prop.asyncEngineCount
+        self.unified_addressing = prop.unifiedAddressing
+        self.memory_clock_rate = prop.memoryClockRate
+        self.memory_bus_width = prop.memoryBusWidth
+        self.l2_cache_size = prop.l2CacheSize
+        self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize
+        self.max_threads_per_multi_processor = prop.maxThreadsPerMultiProcessor
+        self.stream_priorities_supported = prop.streamPrioritiesSupported
+        self.global_l1_cache_supported = prop.globalL1CacheSupported
+        self.local_l1_cache_supported = prop.localL1CacheSupported
+        self.shared_mem_per_multiprocessor = prop.sharedMemPerMultiprocessor
+        self.regs_per_multiprocessor = prop.regsPerMultiprocessor
+        self.managed_memory = prop.managedMemory
+        self.is_multi_gpu_board = prop.isMultiGpuBoard
+        self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID
+        self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio
+        self.pageable_memory_access = prop.pageableMemoryAccess
+        self.concurrent_managed_access = prop.concurrentManagedAccess
+        self.compute_preemption_supported = prop.computePreemptionSupported
+        self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem
+        self.cooperative_launch = prop.cooperativeLaunch
+        self.cooperative_multi_device_launch = prop.cooperativeMultiDeviceLaunch
+        self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables
+        self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost
+        self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize
+        self.reserved_shared_mem_per_block = prop.reservedSharedMemPerBlock
+        self.host_register_supported = prop.hostRegisterSupported
+        self.sparse_cuda_array_supported = prop.sparseCudaArraySupported
+        self.host_register_read_only_supported = prop.hostRegisterReadOnlySupported
+        self.timeline_semaphore_interop_supported = prop.timelineSemaphoreInteropSupported
+        self.memory_pools_supported = prop.memoryPoolsSupported
+        self.gpu_direct_rdma_supported = prop.gpuDirectRDMASupported
+        self.gpu_direct_rdma_flush_writes_options = prop.gpuDirectRDMAFlushWritesOptions
+        self.gpu_direct_rdma_writes_ordering = prop.gpuDirectRDMAWritesOrdering
+        self.memory_pool_supported_handle_types = prop.memoryPoolSupportedHandleTypes
+        self.deferred_mapping_cuda_array_supported = prop.deferredMappingCudaArraySupported
+        self.ipc_event_supported = prop.ipcEventSupported
+        self.unified_function_pointers = prop.unifiedFunctionPointers
+        return self
+
+    def __init__(self, device_id):
+        raise RuntimeError("DeviceProperties should not be instantiated directly")
+
+
 class Device:
     """Represent a GPU and act as an entry point for cuda.core features.
 
@@ -41,7 +334,7 @@ class Device:
 
     """
 
-    __slots__ = ("_id", "_mr", "_has_inited")
+    __slots__ = ("_id", "_mr", "_has_inited", "_properties")
 
     def __new__(cls, device_id=None):
         # important: creating a Device instance does not initialize the GPU!
@@ -73,6 +366,7 @@ def __new__(cls, device_id=None):
                         dev._mr = _SynchronousMemoryResource(dev_id)
 
                     dev._has_inited = False
+                    dev._properties = None
                     _tls.devices.append(dev)
 
         return _tls.devices[device_id]
@@ -124,10 +418,12 @@ def name(self) -> str:
         return name.decode()
 
     @property
-    def properties(self) -> dict:
+    def properties(self) -> DeviceProperties:
         """Return information about the compute-device."""
-        # TODO: pythonize the key names
-        return handle_return(runtime.cudaGetDeviceProperties(self._id))
+        if self._properties is None:
+            self._properties = DeviceProperties._init(self._id)
+
+        return self._properties
 
     @property
     def compute_capability(self) -> ComputeCapability:
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 876299f3..571b7dd8 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -78,3 +78,100 @@ def test_compute_capability():
     )
     expected_cc = ComputeCapability(major, minor)
     assert device.compute_capability == expected_cc
+
+
+def test_device_property_values():
+    device = Device()
+    assert device.properties.name == device.name
+    assert device.properties.uuid.hex() == device.uuid.replace("-", "")
+
+
+def test_device_property_types():
+    device = Device()
+    assert isinstance(device.properties.name, str)
+    assert isinstance(device.properties.uuid, bytes)
+    assert isinstance(device.properties.total_global_mem, int)
+    assert isinstance(device.properties.shared_mem_per_block, int)
+    assert isinstance(device.properties.regs_per_block, int)
+    assert isinstance(device.properties.warp_size, int)
+    assert isinstance(device.properties.mem_pitch, int)
+    assert isinstance(device.properties.max_threads_per_block, int)
+    assert isinstance(device.properties.max_threads_dim, tuple)
+    assert isinstance(device.properties.max_grid_size, tuple)
+    assert isinstance(device.properties.clock_rate, int)
+    assert isinstance(device.properties.total_const_mem, int)
+    assert isinstance(device.properties.major, int)
+    assert isinstance(device.properties.minor, int)
+    assert isinstance(device.properties.texture_alignment, int)
+    assert isinstance(device.properties.texture_pitch_alignment, int)
+    assert isinstance(device.properties.device_overlap, int)
+    assert isinstance(device.properties.multi_processor_count, int)
+    assert isinstance(device.properties.kernel_exec_timeout_enabled, int)
+    assert isinstance(device.properties.integrated, int)
+    assert isinstance(device.properties.can_map_host_memory, int)
+    assert isinstance(device.properties.compute_mode, int)
+    assert isinstance(device.properties.max_texture_1d, int)
+    assert isinstance(device.properties.max_texture_1d_mipmap, int)
+    assert isinstance(device.properties.max_texture_1d_linear, int)
+    assert isinstance(device.properties.max_texture_2d, tuple)
+    assert isinstance(device.properties.max_texture_2d_mipmap, tuple)
+    assert isinstance(device.properties.max_texture_2d_linear, tuple)
+    assert isinstance(device.properties.max_texture_2d_gather, tuple)
+    assert isinstance(device.properties.max_texture_3d, tuple)
+    assert isinstance(device.properties.max_texture_3d_alt, tuple)
+    assert isinstance(device.properties.max_texture_cubemap, int)
+    assert isinstance(device.properties.max_texture_1d_layered, tuple)
+    assert isinstance(device.properties.max_texture_2d_layered, tuple)
+    assert isinstance(device.properties.max_texture_cubemap_layered, tuple)
+    assert isinstance(device.properties.max_surface_1d, int)
+    assert isinstance(device.properties.max_surface_2d, tuple)
+    assert isinstance(device.properties.max_surface_3d, tuple)
+    assert isinstance(device.properties.max_surface_1d_layered, tuple)
+    assert isinstance(device.properties.max_surface_2d_layered, tuple)
+    assert isinstance(device.properties.max_surface_cubemap, int)
+    assert isinstance(device.properties.max_surface_cubemap_layered, tuple)
+    assert isinstance(device.properties.surface_alignment, int)
+    assert isinstance(device.properties.concurrent_kernels, int)
+    assert isinstance(device.properties.ecc_enabled, int)
+    assert isinstance(device.properties.pci_bus_id, int)
+    assert isinstance(device.properties.pci_device_id, int)
+    assert isinstance(device.properties.pci_domain_id, int)
+    assert isinstance(device.properties.tcc_driver, int)
+    assert isinstance(device.properties.async_engine_count, int)
+    assert isinstance(device.properties.unified_addressing, int)
+    assert isinstance(device.properties.memory_clock_rate, int)
+    assert isinstance(device.properties.memory_bus_width, int)
+    assert isinstance(device.properties.l2_cache_size, int)
+    assert isinstance(device.properties.persisting_l2_cache_max_size, int)
+    assert isinstance(device.properties.max_threads_per_multi_processor, int)
+    assert isinstance(device.properties.stream_priorities_supported, int)
+    assert isinstance(device.properties.global_l1_cache_supported, int)
+    assert isinstance(device.properties.local_l1_cache_supported, int)
+    assert isinstance(device.properties.shared_mem_per_multiprocessor, int)
+    assert isinstance(device.properties.regs_per_multiprocessor, int)
+    assert isinstance(device.properties.managed_memory, int)
+    assert isinstance(device.properties.is_multi_gpu_board, int)
+    assert isinstance(device.properties.multi_gpu_board_group_id, int)
+    assert isinstance(device.properties.single_to_double_precision_perf_ratio, int)
+    assert isinstance(device.properties.pageable_memory_access, int)
+    assert isinstance(device.properties.concurrent_managed_access, int)
+    assert isinstance(device.properties.compute_preemption_supported, int)
+    assert isinstance(device.properties.can_use_host_pointer_for_registered_mem, int)
+    assert isinstance(device.properties.cooperative_launch, int)
+    assert isinstance(device.properties.cooperative_multi_device_launch, int)
+    assert isinstance(device.properties.pageable_memory_access_uses_host_page_tables, int)
+    assert isinstance(device.properties.direct_managed_mem_access_from_host, int)
+    assert isinstance(device.properties.access_policy_max_window_size, int)
+    assert isinstance(device.properties.reserved_shared_mem_per_block, int)
+    assert isinstance(device.properties.host_register_supported, int)
+    assert isinstance(device.properties.sparse_cuda_array_supported, int)
+    assert isinstance(device.properties.host_register_read_only_supported, int)
+    assert isinstance(device.properties.timeline_semaphore_interop_supported, int)
+    assert isinstance(device.properties.memory_pools_supported, int)
+    assert isinstance(device.properties.gpu_direct_rdma_supported, int)
+    assert isinstance(device.properties.gpu_direct_rdma_flush_writes_options, int)
+    assert isinstance(device.properties.gpu_direct_rdma_writes_ordering, int)
+    assert isinstance(device.properties.memory_pool_supported_handle_types, int)
+    assert isinstance(device.properties.deferred_mapping_cuda_array_supported, int)
+    assert isinstance(device.properties.ipc_event_supported, int)
+    assert isinstance(device.properties.unified_function_pointers, int)

From c3276f6a3c0367c727c990e3965db5aa360a3f77 Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Fri, 17 Jan 2025 11:55:10 -0800
Subject: [PATCH 02/10] tweaks

---
 cuda_core/docs/source/api_private.rst         | 1 +
 cuda_core/docs/source/release/0.2.0-notes.rst | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst
index f100eb7c..45d32808 100644
--- a/cuda_core/docs/source/api_private.rst
+++ b/cuda_core/docs/source/api_private.rst
@@ -16,6 +16,7 @@ CUDA runtime
    _memory.Buffer
    _stream.Stream
    _event.Event
+   _device.DeviceProperties
 
 
 CUDA compilation toolchain
diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst
index 572567fd..e686b504 100644
--- a/cuda_core/docs/source/release/0.2.0-notes.rst
+++ b/cuda_core/docs/source/release/0.2.0-notes.rst
@@ -1,7 +1,14 @@
 ``cuda.core`` v0.2.0 Release notes
 ==================================
 
+Highlights
+----------
+
+- :class:`DeviceProperties` class added to provide pythonic access to device properties
+
+
 Breaking changes
 ----------------
 
-- Change ``__cuda_stream__`` from attribute to method
\ No newline at end of file
+- Change ``__cuda_stream__`` from attribute to method
+- Device.properties now provides an instance of :class:`DeviceProperties` instead of a dictionary. 
\ No newline at end of file

From 8ce2d0351fae8a91d433910f3019a3a87f89323a Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Fri, 17 Jan 2025 15:25:54 -0800
Subject: [PATCH 03/10] support the full list of properties

---
 cuda_core/cuda/core/experimental/_device.py |  18 ++
 cuda_core/tests/test_device.py              | 197 +++++++++++---------
 2 files changed, 127 insertions(+), 88 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index d5875bba..82e5473c 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -43,6 +43,8 @@ class DeviceProperties:
         Maximum size of each dimension of a grid.
     clock_rate : int
         Clock frequency in kilohertz.
+    cluster_launch : int
+        Indicates device supports cluster launch
     total_const_mem : int
         Total amount of constant memory available on the device in bytes.
     major : int
@@ -174,6 +176,8 @@ class DeviceProperties:
     cooperative_multi_device_launch : int
         1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0
         otherwise.
+    sharedMemPerBlockOptin : int
+        The per device maximum shared memory per block usable by special opt in
     pageable_memory_access_uses_host_page_tables : int
         1 if the device accesses pageable memory via the host's page tables, and 0 otherwise.
     direct_managed_mem_access_from_host : int
@@ -207,6 +211,14 @@ class DeviceProperties:
         1 if the device supports IPC Events, and 0 otherwise.
     unified_function_pointers : int
         1 if the device supports unified pointers, and 0 otherwise.
+    host_native_atomic_supported : int
+        1 if the link between the device and the host supports native atomic operations.
+    luid : bytes
+        8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms.
+    luid_device_node_mask : int
+        LUID device node mask. Value is undefined on TCC and non-Windows platforms.
+    max_blocks_per_multi_processor : int
+        Maximum number of resident blocks per multiprocessor.
     """
 
     def _init(device_id):
@@ -225,6 +237,7 @@ def _init(device_id):
         self.max_threads_dim = tuple(prop.maxThreadsDim)
         self.max_grid_size = tuple(prop.maxGridSize)
         self.clock_rate = prop.clockRate
+        self.cluster_launch = prop.clusterLaunch
         self.total_const_mem = prop.totalConstMem
         self.major = prop.major
         self.minor = prop.minor
@@ -285,6 +298,7 @@ def _init(device_id):
         self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem
         self.cooperative_launch = prop.cooperativeLaunch
         self.cooperative_multi_device_launch = prop.cooperativeMultiDeviceLaunch
+        self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin
         self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables
         self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost
         self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize
@@ -301,6 +315,10 @@ def _init(device_id):
         self.deferred_mapping_cuda_array_supported = prop.deferredMappingCudaArraySupported
         self.ipc_event_supported = prop.ipcEventSupported
         self.unified_function_pointers = prop.unifiedFunctionPointers
+        self.host_native_atomic_supported = prop.hostNativeAtomicSupported
+        self.luid = prop.luid
+        self.luid_device_node_mask = prop.luidDeviceNodeMask
+        self.max_blocks_per_multi_processor = prop.maxBlocksPerMultiProcessor
         return self
 
     def __init__(self, device_id):
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 571b7dd8..2b0ffe39 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -11,6 +11,7 @@
 except ImportError:
     from cuda import cuda as driver
     from cuda import cudart as runtime
+import pytest
 
 from cuda.core.experimental import Device
 from cuda.core.experimental._utils import ComputeCapability, handle_return
@@ -86,92 +87,112 @@ def test_device_property_values():
     assert device.properties.uuid.hex() == device.uuid.replace("-", "")
 
 
-def test_device_property_types():
+cuda_base_properties = [
+    ("name", str),
+    ("uuid", bytes),
+    ("total_global_mem", int),
+    ("shared_mem_per_block", int),
+    ("regs_per_block", int),
+    ("warp_size", int),
+    ("mem_pitch", int),
+    ("max_threads_per_block", int),
+    ("max_threads_dim", tuple),
+    ("max_grid_size", tuple),
+    ("clock_rate", int),
+    ("total_const_mem", int),
+    ("major", int),
+    ("minor", int),
+    ("texture_alignment", int),
+    ("texture_pitch_alignment", int),
+    ("device_overlap", int),
+    ("multi_processor_count", int),
+    ("kernel_exec_timeout_enabled", int),
+    ("integrated", int),
+    ("can_map_host_memory", int),
+    ("compute_mode", int),
+    ("max_texture_1d", int),
+    ("max_texture_1d_mipmap", int),
+    ("max_texture_1d_linear", int),
+    ("max_texture_2d", tuple),
+    ("max_texture_2d_mipmap", tuple),
+    ("max_texture_2d_linear", tuple),
+    ("max_texture_2d_gather", tuple),
+    ("max_texture_3d", tuple),
+    ("max_texture_3d_alt", tuple),
+    ("max_texture_cubemap", int),
+    ("max_texture_1d_layered", tuple),
+    ("max_texture_2d_layered", tuple),
+    ("max_texture_cubemap_layered", tuple),
+    ("max_surface_1d", int),
+    ("max_surface_2d", tuple),
+    ("max_surface_3d", tuple),
+    ("max_surface_1d_layered", tuple),
+    ("max_surface_2d_layered", tuple),
+    ("max_surface_cubemap", int),
+    ("max_surface_cubemap_layered", tuple),
+    ("surface_alignment", int),
+    ("concurrent_kernels", int),
+    ("ecc_enabled", int),
+    ("pci_bus_id", int),
+    ("pci_device_id", int),
+    ("pci_domain_id", int),
+    ("tcc_driver", int),
+    ("async_engine_count", int),
+    ("unified_addressing", int),
+    ("memory_clock_rate", int),
+    ("memory_bus_width", int),
+    ("l2_cache_size", int),
+    ("persisting_l2_cache_max_size", int),
+    ("max_threads_per_multi_processor", int),
+    ("stream_priorities_supported", int),
+    ("global_l1_cache_supported", int),
+    ("local_l1_cache_supported", int),
+    ("shared_mem_per_multiprocessor", int),
+    ("regs_per_multiprocessor", int),
+    ("managed_memory", int),
+    ("is_multi_gpu_board", int),
+    ("multi_gpu_board_group_id", int),
+    ("single_to_double_precision_perf_ratio", int),
+    ("pageable_memory_access", int),
+    ("concurrent_managed_access", int),
+    ("compute_preemption_supported", int),
+    ("can_use_host_pointer_for_registered_mem", int),
+    ("cooperative_launch", int),
+    ("cooperative_multi_device_launch", int),
+    ("pageable_memory_access_uses_host_page_tables", int),
+    ("direct_managed_mem_access_from_host", int),
+    ("access_policy_max_window_size", int),
+    ("reserved_shared_mem_per_block", int),
+    ("host_register_supported", int),
+    ("sparse_cuda_array_supported", int),
+    ("host_register_read_only_supported", int),
+    ("timeline_semaphore_interop_supported", int),
+    ("memory_pools_supported", int),
+    ("gpu_direct_rdma_supported", int),
+    ("gpu_direct_rdma_flush_writes_options", int),
+    ("gpu_direct_rdma_writes_ordering", int),
+    ("memory_pool_supported_handle_types", int),
+    ("deferred_mapping_cuda_array_supported", int),
+    ("ipc_event_supported", int),
+    ("unified_function_pointers", int),
+]
+
+cuda_12_properties = [
+    ("host_native_atomic_supported", int),
+    ("luid", bytes),
+    ("luid_device_node_mask", int),
+    ("max_blocks_per_multi_processor", int),
+    ("shared_mem_per_block_optin", int),
+    ("cluster_launch", int),
+]
+
+
+driver_ver = handle_return(driver.cuDriverGetVersion())
+if driver_ver >= 12000:
+    cuda_base_properties += cuda_12_properties
+
+
+@pytest.mark.parametrize("property_name, expected_type", cuda_base_properties)
+def test_device_property_types(property_name, expected_type):
     device = Device()
-    assert isinstance(device.properties.name, str)
-    assert isinstance(device.properties.uuid, bytes)
-    assert isinstance(device.properties.total_global_mem, int)
-    assert isinstance(device.properties.shared_mem_per_block, int)
-    assert isinstance(device.properties.regs_per_block, int)
-    assert isinstance(device.properties.warp_size, int)
-    assert isinstance(device.properties.mem_pitch, int)
-    assert isinstance(device.properties.max_threads_per_block, int)
-    assert isinstance(device.properties.max_threads_dim, tuple)
-    assert isinstance(device.properties.max_grid_size, tuple)
-    assert isinstance(device.properties.clock_rate, int)
-    assert isinstance(device.properties.total_const_mem, int)
-    assert isinstance(device.properties.major, int)
-    assert isinstance(device.properties.minor, int)
-    assert isinstance(device.properties.texture_alignment, int)
-    assert isinstance(device.properties.texture_pitch_alignment, int)
-    assert isinstance(device.properties.device_overlap, int)
-    assert isinstance(device.properties.multi_processor_count, int)
-    assert isinstance(device.properties.kernel_exec_timeout_enabled, int)
-    assert isinstance(device.properties.integrated, int)
-    assert isinstance(device.properties.can_map_host_memory, int)
-    assert isinstance(device.properties.compute_mode, int)
-    assert isinstance(device.properties.max_texture_1d, int)
-    assert isinstance(device.properties.max_texture_1d_mipmap, int)
-    assert isinstance(device.properties.max_texture_1d_linear, int)
-    assert isinstance(device.properties.max_texture_2d, tuple)
-    assert isinstance(device.properties.max_texture_2d_mipmap, tuple)
-    assert isinstance(device.properties.max_texture_2d_linear, tuple)
-    assert isinstance(device.properties.max_texture_2d_gather, tuple)
-    assert isinstance(device.properties.max_texture_3d, tuple)
-    assert isinstance(device.properties.max_texture_3d_alt, tuple)
-    assert isinstance(device.properties.max_texture_cubemap, int)
-    assert isinstance(device.properties.max_texture_1d_layered, tuple)
-    assert isinstance(device.properties.max_texture_2d_layered, tuple)
-    assert isinstance(device.properties.max_texture_cubemap_layered, tuple)
-    assert isinstance(device.properties.max_surface_1d, int)
-    assert isinstance(device.properties.max_surface_2d, tuple)
-    assert isinstance(device.properties.max_surface_3d, tuple)
-    assert isinstance(device.properties.max_surface_1d_layered, tuple)
-    assert isinstance(device.properties.max_surface_2d_layered, tuple)
-    assert isinstance(device.properties.max_surface_cubemap, int)
-    assert isinstance(device.properties.max_surface_cubemap_layered, tuple)
-    assert isinstance(device.properties.surface_alignment, int)
-    assert isinstance(device.properties.concurrent_kernels, int)
-    assert isinstance(device.properties.ecc_enabled, int)
-    assert isinstance(device.properties.pci_bus_id, int)
-    assert isinstance(device.properties.pci_device_id, int)
-    assert isinstance(device.properties.pci_domain_id, int)
-    assert isinstance(device.properties.tcc_driver, int)
-    assert isinstance(device.properties.async_engine_count, int)
-    assert isinstance(device.properties.unified_addressing, int)
-    assert isinstance(device.properties.memory_clock_rate, int)
-    assert isinstance(device.properties.memory_bus_width, int)
-    assert isinstance(device.properties.l2_cache_size, int)
-    assert isinstance(device.properties.persisting_l2_cache_max_size, int)
-    assert isinstance(device.properties.max_threads_per_multi_processor, int)
-    assert isinstance(device.properties.stream_priorities_supported, int)
-    assert isinstance(device.properties.global_l1_cache_supported, int)
-    assert isinstance(device.properties.local_l1_cache_supported, int)
-    assert isinstance(device.properties.shared_mem_per_multiprocessor, int)
-    assert isinstance(device.properties.regs_per_multiprocessor, int)
-    assert isinstance(device.properties.managed_memory, int)
-    assert isinstance(device.properties.is_multi_gpu_board, int)
-    assert isinstance(device.properties.multi_gpu_board_group_id, int)
-    assert isinstance(device.properties.single_to_double_precision_perf_ratio, int)
-    assert isinstance(device.properties.pageable_memory_access, int)
-    assert isinstance(device.properties.concurrent_managed_access, int)
-    assert isinstance(device.properties.compute_preemption_supported, int)
-    assert isinstance(device.properties.can_use_host_pointer_for_registered_mem, int)
-    assert isinstance(device.properties.cooperative_launch, int)
-    assert isinstance(device.properties.cooperative_multi_device_launch, int)
-    assert isinstance(device.properties.pageable_memory_access_uses_host_page_tables, int)
-    assert isinstance(device.properties.direct_managed_mem_access_from_host, int)
-    assert isinstance(device.properties.access_policy_max_window_size, int)
-    assert isinstance(device.properties.reserved_shared_mem_per_block, int)
-    assert isinstance(device.properties.host_register_supported, int)
-    assert isinstance(device.properties.sparse_cuda_array_supported, int)
-    assert isinstance(device.properties.host_register_read_only_supported, int)
-    assert isinstance(device.properties.timeline_semaphore_interop_supported, int)
-    assert isinstance(device.properties.memory_pools_supported, int)
-    assert isinstance(device.properties.gpu_direct_rdma_supported, int)
-    assert isinstance(device.properties.gpu_direct_rdma_flush_writes_options, int)
-    assert isinstance(device.properties.gpu_direct_rdma_writes_ordering, int)
-    assert isinstance(device.properties.memory_pool_supported_handle_types, int)
-    assert isinstance(device.properties.deferred_mapping_cuda_array_supported, int)
-    assert isinstance(device.properties.ipc_event_supported, int)
-    assert isinstance(device.properties.unified_function_pointers, int)
+    assert isinstance(getattr(device.properties, property_name), expected_type)

From afb7ed8fd75f3a0f8de85eef963897f97c162a8f Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Fri, 17 Jan 2025 15:32:44 -0800
Subject: [PATCH 04/10] remove deprecated properties

---
 cuda_core/cuda/core/experimental/_device.py | 24 ---------------------
 cuda_core/tests/test_device.py              |  8 -------
 2 files changed, 32 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 82e5473c..1433cab5 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -56,25 +56,17 @@ class DeviceProperties:
         offset applied to texture fetches.
     texture_pitch_alignment : int
         Pitch alignment requirement for 2D texture references that are bound to pitched memory.
-    device_overlap : int
-        1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not.
     multi_processor_count : int
         Number of multiprocessors on the device.
-    kernel_exec_timeout_enabled : int
-        1 if there is a run time limit for kernels executed on the device, or 0 if not.
     integrated : int
         1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component.
     can_map_host_memory : int
         1 if the device can map host memory into the CUDA address space for use with
         cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not.
-    compute_mode : int
-        Compute mode that the device is currently in.
     max_texture_1d : int
         Maximum 1D texture size.
     max_texture_1d_mipmap : int
         Maximum 1D mipmapped texture size.
-    max_texture_1d_linear : int
-        Maximum 1D texture size for textures bound to linear memory.
     max_texture_2d : tuple
         Maximum 2D texture dimensions.
     max_texture_2d_mipmap : tuple
@@ -130,8 +122,6 @@ class DeviceProperties:
         and execute a kernel at the same time. It is 0 if neither of these is supported.
     unified_addressing : int
         1 if the device shares a unified address space with the host and 0 otherwise.
-    memory_clock_rate : int
-        Peak memory clock frequency in kilohertz.
     memory_bus_width : int
         Memory bus width in bits.
     l2_cache_size : int
@@ -159,9 +149,6 @@ class DeviceProperties:
     multi_gpu_board_group_id : int
         Unique identifier for a group of devices associated with the same board. Devices on the same
         multi-GPU board will share the same identifier.
-    single_to_double_precision_perf_ratio : int
-        Ratio of single precision performance (in floating-point operations per second) to double precision
-        performance.
     pageable_memory_access : int
         1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it,
         and 0 otherwise.
@@ -173,9 +160,6 @@ class DeviceProperties:
         1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise.
     cooperative_launch : int
         1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise.
-    cooperative_multi_device_launch : int
-        1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0
-        otherwise.
     sharedMemPerBlockOptin : int
         The per device maximum shared memory per block usable by special opt in
     pageable_memory_access_uses_host_page_tables : int
@@ -236,22 +220,17 @@ def _init(device_id):
         self.max_threads_per_block = prop.maxThreadsPerBlock
         self.max_threads_dim = tuple(prop.maxThreadsDim)
         self.max_grid_size = tuple(prop.maxGridSize)
-        self.clock_rate = prop.clockRate
         self.cluster_launch = prop.clusterLaunch
         self.total_const_mem = prop.totalConstMem
         self.major = prop.major
         self.minor = prop.minor
         self.texture_alignment = prop.textureAlignment
         self.texture_pitch_alignment = prop.texturePitchAlignment
-        self.device_overlap = prop.deviceOverlap
         self.multi_processor_count = prop.multiProcessorCount
-        self.kernel_exec_timeout_enabled = prop.kernelExecTimeoutEnabled
         self.integrated = prop.integrated
         self.can_map_host_memory = prop.canMapHostMemory
-        self.compute_mode = prop.computeMode
         self.max_texture_1d = prop.maxTexture1D
         self.max_texture_1d_mipmap = prop.maxTexture1DMipmap
-        self.max_texture_1d_linear = prop.maxTexture1DLinear
         self.max_texture_2d = tuple(prop.maxTexture2D)
         self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap)
         self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear)
@@ -278,7 +257,6 @@ def _init(device_id):
         self.tcc_driver = prop.tccDriver
         self.async_engine_count = prop.asyncEngineCount
         self.unified_addressing = prop.unifiedAddressing
-        self.memory_clock_rate = prop.memoryClockRate
         self.memory_bus_width = prop.memoryBusWidth
         self.l2_cache_size = prop.l2CacheSize
         self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize
@@ -291,13 +269,11 @@ def _init(device_id):
         self.managed_memory = prop.managedMemory
         self.is_multi_gpu_board = prop.isMultiGpuBoard
         self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID
-        self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio
         self.pageable_memory_access = prop.pageableMemoryAccess
         self.concurrent_managed_access = prop.concurrentManagedAccess
         self.compute_preemption_supported = prop.computePreemptionSupported
         self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem
         self.cooperative_launch = prop.cooperativeLaunch
-        self.cooperative_multi_device_launch = prop.cooperativeMultiDeviceLaunch
         self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin
         self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables
         self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 2b0ffe39..4d7aa2cf 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -98,21 +98,16 @@ def test_device_property_values():
     ("max_threads_per_block", int),
     ("max_threads_dim", tuple),
     ("max_grid_size", tuple),
-    ("clock_rate", int),
     ("total_const_mem", int),
     ("major", int),
     ("minor", int),
     ("texture_alignment", int),
     ("texture_pitch_alignment", int),
-    ("device_overlap", int),
     ("multi_processor_count", int),
-    ("kernel_exec_timeout_enabled", int),
     ("integrated", int),
     ("can_map_host_memory", int),
-    ("compute_mode", int),
     ("max_texture_1d", int),
     ("max_texture_1d_mipmap", int),
-    ("max_texture_1d_linear", int),
     ("max_texture_2d", tuple),
     ("max_texture_2d_mipmap", tuple),
     ("max_texture_2d_linear", tuple),
@@ -139,7 +134,6 @@ def test_device_property_values():
     ("tcc_driver", int),
     ("async_engine_count", int),
     ("unified_addressing", int),
-    ("memory_clock_rate", int),
     ("memory_bus_width", int),
     ("l2_cache_size", int),
     ("persisting_l2_cache_max_size", int),
@@ -152,13 +146,11 @@ def test_device_property_values():
     ("managed_memory", int),
     ("is_multi_gpu_board", int),
     ("multi_gpu_board_group_id", int),
-    ("single_to_double_precision_perf_ratio", int),
     ("pageable_memory_access", int),
     ("concurrent_managed_access", int),
     ("compute_preemption_supported", int),
     ("can_use_host_pointer_for_registered_mem", int),
     ("cooperative_launch", int),
-    ("cooperative_multi_device_launch", int),
     ("pageable_memory_access_uses_host_page_tables", int),
     ("direct_managed_mem_access_from_host", int),
     ("access_policy_max_window_size", int),

From b2b2bafa35a3fea4ba6055472c20c5763b43b8ba Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Fri, 17 Jan 2025 15:40:34 -0800
Subject: [PATCH 05/10] convert to bools where appropriate

---
 cuda_core/cuda/core/experimental/_device.py | 148 ++++++++++++--------
 cuda_core/tests/test_device.py              |  58 ++++----
 2 files changed, 115 insertions(+), 91 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 1433cab5..6b7e6ae4 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -28,6 +28,8 @@ class DeviceProperties:
         Total amount of global memory available on the device in bytes.
     shared_mem_per_block : int
         Maximum amount of shared memory available to a thread block in bytes.
+    shared_mem_per_block_optin : int
+        Maximum shared memory per block usable by special opt in
     regs_per_block : int
         Maximum number of 32-bit registers available to a thread block.
     warp_size : int
@@ -43,8 +45,8 @@ class DeviceProperties:
         Maximum size of each dimension of a grid.
     clock_rate : int
         Clock frequency in kilohertz.
-    cluster_launch : int
-        Indicates device supports cluster launch
+    cluster_launch : bool
+        Indicates device supports cluster launch.
     total_const_mem : int
         Total amount of constant memory available on the device in bytes.
     major : int
@@ -56,17 +58,25 @@ class DeviceProperties:
         offset applied to texture fetches.
     texture_pitch_alignment : int
         Pitch alignment requirement for 2D texture references that are bound to pitched memory.
+    device_overlap : bool
+        1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not.
     multi_processor_count : int
         Number of multiprocessors on the device.
-    integrated : int
+    kernel_exec_timeout_enabled : bool
+        1 if there is a run time limit for kernels executed on the device, or 0 if not.
+    integrated : bool
         1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component.
-    can_map_host_memory : int
+    can_map_host_memory : bool
         1 if the device can map host memory into the CUDA address space for use with
         cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not.
+    compute_mode : int
+        Compute mode that the device is currently in.
     max_texture_1d : int
         Maximum 1D texture size.
     max_texture_1d_mipmap : int
         Maximum 1D mipmapped texture size.
+    max_texture_1d_linear : int
+        Maximum 1D texture size for textures bound to linear memory.
     max_texture_2d : tuple
         Maximum 2D texture dimensions.
     max_texture_2d_mipmap : tuple
@@ -103,10 +113,10 @@ class DeviceProperties:
         Maximum cubemap layered surface dimensions.
     surface_alignment : int
         Alignment requirements for surfaces.
-    concurrent_kernels : int
+    concurrent_kernels : bool
         1 if the device supports executing multiple kernels within the same context simultaneously, or 0 if
         not.
-    ecc_enabled : int
+    ecc_enabled : bool
         1 if the device has ECC support turned on, or 0 if not.
     pci_bus_id : int
         PCI bus identifier of the device.
@@ -114,14 +124,16 @@ class DeviceProperties:
         PCI device (sometimes called slot) identifier of the device.
     pci_domain_id : int
         PCI domain identifier of the device.
-    tcc_driver : int
+    tcc_driver : bool
         1 if the device is using a TCC driver or 0 if not.
     async_engine_count : int
         1 when the device can concurrently copy memory between host and device while executing a kernel.
         It is 2 when the device can concurrently copy memory between host and device in both directions
         and execute a kernel at the same time. It is 0 if neither of these is supported.
-    unified_addressing : int
+    unified_addressing : bool
         1 if the device shares a unified address space with the host and 0 otherwise.
+    memory_clock_rate : int
+        Peak memory clock frequency in kilohertz.
     memory_bus_width : int
         Memory bus width in bits.
     l2_cache_size : int
@@ -130,11 +142,11 @@ class DeviceProperties:
         L2 cache's maximum persisting lines size in bytes.
     max_threads_per_multi_processor : int
         Number of maximum resident threads per multiprocessor.
-    stream_priorities_supported : int
+    stream_priorities_supported : bool
         1 if the device supports stream priorities, or 0 if it is not supported.
-    global_l1_cache_supported : int
+    global_l1_cache_supported : bool
         1 if the device supports caching of globals in L1 cache, or 0 if it is not supported.
-    local_l1_cache_supported : int
+    local_l1_cache_supported : bool
         1 if the device supports caching of locals in L1 cache, or 0 if it is not supported.
     shared_mem_per_multiprocessor : int
         Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all
@@ -142,46 +154,50 @@ class DeviceProperties:
     regs_per_multiprocessor : int
         Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread
         blocks simultaneously resident on a multiprocessor.
-    managed_memory : int
+    managed_memory : bool
         1 if the device supports allocating managed memory on this system, or 0 if it is not supported.
-    is_multi_gpu_board : int
+    is_multi_gpu_board : bool
         1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not.
     multi_gpu_board_group_id : int
         Unique identifier for a group of devices associated with the same board. Devices on the same
         multi-GPU board will share the same identifier.
-    pageable_memory_access : int
+    single_to_double_precision_perf_ratio : int
+        Ratio of single precision performance (in floating-point operations per second) to double precision
+        performance.
+    pageable_memory_access : bool
         1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it,
         and 0 otherwise.
-    concurrent_managed_access : int
+    concurrent_managed_access : bool
         1 if the device can coherently access managed memory concurrently with the CPU, and 0 otherwise.
-    compute_preemption_supported : int
+    compute_preemption_supported : bool
         1 if the device supports Compute Preemption, and 0 otherwise.
-    can_use_host_pointer_for_registered_mem : int
+    can_use_host_pointer_for_registered_mem : bool
         1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise.
-    cooperative_launch : int
+    cooperative_launch : bool
         1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise.
-    sharedMemPerBlockOptin : int
-        The per device maximum shared memory per block usable by special opt in
-    pageable_memory_access_uses_host_page_tables : int
+    cooperative_multi_device_launch : bool
+        1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0
+        otherwise.
+    pageable_memory_access_uses_host_page_tables : bool
         1 if the device accesses pageable memory via the host's page tables, and 0 otherwise.
-    direct_managed_mem_access_from_host : int
+    direct_managed_mem_access_from_host : bool
         1 if the host can directly access managed memory on the device without migration, and 0 otherwise.
     access_policy_max_window_size : int
         Maximum value of cudaAccessPolicyWindow::num_bytes.
     reserved_shared_mem_per_block : int
         Shared memory reserved by CUDA driver per block in bytes.
-    host_register_supported : int
+    host_register_supported : bool
         1 if the device supports host memory registration via cudaHostRegister, and 0 otherwise.
-    sparse_cuda_array_supported : int
+    sparse_cuda_array_supported : bool
         1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise.
-    host_register_read_only_supported : int
+    host_register_read_only_supported : bool
         1 if the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory
         that must be mapped as read-only to the GPU.
-    timeline_semaphore_interop_supported : int
+    timeline_semaphore_interop_supported : bool
         1 if external timeline semaphore interop is supported on the device, 0 otherwise.
-    memory_pools_supported : int
+    memory_pools_supported : bool
         1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise.
-    gpu_direct_rdma_supported : int
+    gpu_direct_rdma_supported : bool
         1 if the device supports GPUDirect RDMA APIs, 0 otherwise.
     gpu_direct_rdma_flush_writes_options : int
         Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum.
@@ -189,13 +205,13 @@ class DeviceProperties:
         See the cudaGPUDirectRDMAWritesOrdering enum for numerical values.
     memory_pool_supported_handle_types : int
         Bitmask of handle types supported with mempool-based IPC.
-    deferred_mapping_cuda_array_supported : int
+    deferred_mapping_cuda_array_supported : bool
         1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.
-    ipc_event_supported : int
+    ipc_event_supported : bool
         1 if the device supports IPC Events, and 0 otherwise.
-    unified_function_pointers : int
+    unified_function_pointers : bool
         1 if the device supports unified pointers, and 0 otherwise.
-    host_native_atomic_supported : int
+    host_native_atomic_supported : bool
         1 if the link between the device and the host supports native atomic operations.
     luid : bytes
         8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms.
@@ -214,23 +230,29 @@ def _init(device_id):
         self.uuid = prop.uuid.bytes
         self.total_global_mem = prop.totalGlobalMem
         self.shared_mem_per_block = prop.sharedMemPerBlock
+        self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin
         self.regs_per_block = prop.regsPerBlock
         self.warp_size = prop.warpSize
         self.mem_pitch = prop.memPitch
         self.max_threads_per_block = prop.maxThreadsPerBlock
         self.max_threads_dim = tuple(prop.maxThreadsDim)
         self.max_grid_size = tuple(prop.maxGridSize)
-        self.cluster_launch = prop.clusterLaunch
+        self.clock_rate = prop.clockRate
+        self.cluster_launch = bool(prop.clusterLaunch)
         self.total_const_mem = prop.totalConstMem
         self.major = prop.major
         self.minor = prop.minor
         self.texture_alignment = prop.textureAlignment
         self.texture_pitch_alignment = prop.texturePitchAlignment
+        self.device_overlap = bool(prop.deviceOverlap)
         self.multi_processor_count = prop.multiProcessorCount
-        self.integrated = prop.integrated
-        self.can_map_host_memory = prop.canMapHostMemory
+        self.kernel_exec_timeout_enabled = bool(prop.kernelExecTimeoutEnabled)
+        self.integrated = bool(prop.integrated)
+        self.can_map_host_memory = bool(prop.canMapHostMemory)
+        self.compute_mode = prop.computeMode
         self.max_texture_1d = prop.maxTexture1D
         self.max_texture_1d_mipmap = prop.maxTexture1DMipmap
+        self.max_texture_1d_linear = prop.maxTexture1DLinear
         self.max_texture_2d = tuple(prop.maxTexture2D)
         self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap)
         self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear)
@@ -249,49 +271,51 @@ def _init(device_id):
         self.max_surface_cubemap = prop.maxSurfaceCubemap
         self.max_surface_cubemap_layered = tuple(prop.maxSurfaceCubemapLayered)
         self.surface_alignment = prop.surfaceAlignment
-        self.concurrent_kernels = prop.concurrentKernels
-        self.ecc_enabled = prop.ECCEnabled
+        self.concurrent_kernels = bool(prop.concurrentKernels)
+        self.ecc_enabled = bool(prop.ECCEnabled)
         self.pci_bus_id = prop.pciBusID
         self.pci_device_id = prop.pciDeviceID
         self.pci_domain_id = prop.pciDomainID
-        self.tcc_driver = prop.tccDriver
+        self.tcc_driver = bool(prop.tccDriver)
         self.async_engine_count = prop.asyncEngineCount
-        self.unified_addressing = prop.unifiedAddressing
+        self.unified_addressing = bool(prop.unifiedAddressing)
+        self.memory_clock_rate = prop.memoryClockRate
         self.memory_bus_width = prop.memoryBusWidth
         self.l2_cache_size = prop.l2CacheSize
         self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize
         self.max_threads_per_multi_processor = prop.maxThreadsPerMultiProcessor
-        self.stream_priorities_supported = prop.streamPrioritiesSupported
-        self.global_l1_cache_supported = prop.globalL1CacheSupported
-        self.local_l1_cache_supported = prop.localL1CacheSupported
+        self.stream_priorities_supported = bool(prop.streamPrioritiesSupported)
+        self.global_l1_cache_supported = bool(prop.globalL1CacheSupported)
+        self.local_l1_cache_supported = bool(prop.localL1CacheSupported)
         self.shared_mem_per_multiprocessor = prop.sharedMemPerMultiprocessor
         self.regs_per_multiprocessor = prop.regsPerMultiprocessor
-        self.managed_memory = prop.managedMemory
-        self.is_multi_gpu_board = prop.isMultiGpuBoard
+        self.managed_memory = bool(prop.managedMemory)
+        self.is_multi_gpu_board = bool(prop.isMultiGpuBoard)
         self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID
-        self.pageable_memory_access = prop.pageableMemoryAccess
-        self.concurrent_managed_access = prop.concurrentManagedAccess
-        self.compute_preemption_supported = prop.computePreemptionSupported
-        self.can_use_host_pointer_for_registered_mem = prop.canUseHostPointerForRegisteredMem
-        self.cooperative_launch = prop.cooperativeLaunch
-        self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin
-        self.pageable_memory_access_uses_host_page_tables = prop.pageableMemoryAccessUsesHostPageTables
-        self.direct_managed_mem_access_from_host = prop.directManagedMemAccessFromHost
+        self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio
+        self.pageable_memory_access = bool(prop.pageableMemoryAccess)
+        self.concurrent_managed_access = bool(prop.concurrentManagedAccess)
+        self.compute_preemption_supported = bool(prop.computePreemptionSupported)
+        self.can_use_host_pointer_for_registered_mem = bool(prop.canUseHostPointerForRegisteredMem)
+        self.cooperative_launch = bool(prop.cooperativeLaunch)
+        self.cooperative_multi_device_launch = bool(prop.cooperativeMultiDeviceLaunch)
+        self.pageable_memory_access_uses_host_page_tables = bool(prop.pageableMemoryAccessUsesHostPageTables)
+        self.direct_managed_mem_access_from_host = bool(prop.directManagedMemAccessFromHost)
         self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize
         self.reserved_shared_mem_per_block = prop.reservedSharedMemPerBlock
-        self.host_register_supported = prop.hostRegisterSupported
-        self.sparse_cuda_array_supported = prop.sparseCudaArraySupported
-        self.host_register_read_only_supported = prop.hostRegisterReadOnlySupported
-        self.timeline_semaphore_interop_supported = prop.timelineSemaphoreInteropSupported
-        self.memory_pools_supported = prop.memoryPoolsSupported
-        self.gpu_direct_rdma_supported = prop.gpuDirectRDMASupported
+        self.host_register_supported = bool(prop.hostRegisterSupported)
+        self.sparse_cuda_array_supported = bool(prop.sparseCudaArraySupported)
+        self.host_register_read_only_supported = bool(prop.hostRegisterReadOnlySupported)
+        self.timeline_semaphore_interop_supported = bool(prop.timelineSemaphoreInteropSupported)
+        self.memory_pools_supported = bool(prop.memoryPoolsSupported)
+        self.gpu_direct_rdma_supported = bool(prop.gpuDirectRDMASupported)
         self.gpu_direct_rdma_flush_writes_options = prop.gpuDirectRDMAFlushWritesOptions
         self.gpu_direct_rdma_writes_ordering = prop.gpuDirectRDMAWritesOrdering
         self.memory_pool_supported_handle_types = prop.memoryPoolSupportedHandleTypes
-        self.deferred_mapping_cuda_array_supported = prop.deferredMappingCudaArraySupported
-        self.ipc_event_supported = prop.ipcEventSupported
-        self.unified_function_pointers = prop.unifiedFunctionPointers
-        self.host_native_atomic_supported = prop.hostNativeAtomicSupported
+        self.deferred_mapping_cuda_array_supported = bool(prop.deferredMappingCudaArraySupported)
+        self.ipc_event_supported = bool(prop.ipcEventSupported)
+        self.unified_function_pointers = bool(prop.unifiedFunctionPointers)
+        self.host_native_atomic_supported = bool(prop.hostNativeAtomicSupported)
         self.luid = prop.luid
         self.luid_device_node_mask = prop.luidDeviceNodeMask
         self.max_blocks_per_multi_processor = prop.maxBlocksPerMultiProcessor
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 4d7aa2cf..0a441996 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -104,8 +104,8 @@ def test_device_property_values():
     ("texture_alignment", int),
     ("texture_pitch_alignment", int),
     ("multi_processor_count", int),
-    ("integrated", int),
-    ("can_map_host_memory", int),
+    ("integrated", bool),
+    ("can_map_host_memory", bool),
     ("max_texture_1d", int),
     ("max_texture_1d_mipmap", int),
     ("max_texture_2d", tuple),
@@ -126,56 +126,56 @@ def test_device_property_values():
     ("max_surface_cubemap", int),
     ("max_surface_cubemap_layered", tuple),
     ("surface_alignment", int),
-    ("concurrent_kernels", int),
-    ("ecc_enabled", int),
+    ("concurrent_kernels", bool),
+    ("ecc_enabled", bool),
     ("pci_bus_id", int),
     ("pci_device_id", int),
     ("pci_domain_id", int),
-    ("tcc_driver", int),
+    ("tcc_driver", bool),
     ("async_engine_count", int),
-    ("unified_addressing", int),
+    ("unified_addressing", bool),
     ("memory_bus_width", int),
     ("l2_cache_size", int),
     ("persisting_l2_cache_max_size", int),
     ("max_threads_per_multi_processor", int),
-    ("stream_priorities_supported", int),
-    ("global_l1_cache_supported", int),
-    ("local_l1_cache_supported", int),
+    ("stream_priorities_supported", bool),
+    ("global_l1_cache_supported", bool),
+    ("local_l1_cache_supported", bool),
     ("shared_mem_per_multiprocessor", int),
     ("regs_per_multiprocessor", int),
-    ("managed_memory", int),
-    ("is_multi_gpu_board", int),
+    ("managed_memory", bool),
+    ("is_multi_gpu_board", bool),
     ("multi_gpu_board_group_id", int),
-    ("pageable_memory_access", int),
-    ("concurrent_managed_access", int),
-    ("compute_preemption_supported", int),
-    ("can_use_host_pointer_for_registered_mem", int),
-    ("cooperative_launch", int),
-    ("pageable_memory_access_uses_host_page_tables", int),
-    ("direct_managed_mem_access_from_host", int),
+    ("pageable_memory_access", bool),
+    ("concurrent_managed_access", bool),
+    ("compute_preemption_supported", bool),
+    ("can_use_host_pointer_for_registered_mem", bool),
+    ("cooperative_launch", bool),
+    ("pageable_memory_access_uses_host_page_tables", bool),
+    ("direct_managed_mem_access_from_host", bool),
     ("access_policy_max_window_size", int),
     ("reserved_shared_mem_per_block", int),
-    ("host_register_supported", int),
-    ("sparse_cuda_array_supported", int),
-    ("host_register_read_only_supported", int),
-    ("timeline_semaphore_interop_supported", int),
-    ("memory_pools_supported", int),
-    ("gpu_direct_rdma_supported", int),
+    ("host_register_supported", bool),
+    ("sparse_cuda_array_supported", bool),
+    ("host_register_read_only_supported", bool),
+    ("timeline_semaphore_interop_supported", bool),
+    ("memory_pools_supported", bool),
+    ("gpu_direct_rdma_supported", bool),
     ("gpu_direct_rdma_flush_writes_options", int),
     ("gpu_direct_rdma_writes_ordering", int),
     ("memory_pool_supported_handle_types", int),
-    ("deferred_mapping_cuda_array_supported", int),
-    ("ipc_event_supported", int),
-    ("unified_function_pointers", int),
+    ("deferred_mapping_cuda_array_supported", bool),
+    ("ipc_event_supported", bool),
+    ("unified_function_pointers", bool),
 ]
 
 cuda_12_properties = [
-    ("host_native_atomic_supported", int),
+    ("host_native_atomic_supported", bool),
     ("luid", bytes),
     ("luid_device_node_mask", int),
     ("max_blocks_per_multi_processor", int),
     ("shared_mem_per_block_optin", int),
-    ("cluster_launch", int),
+    ("cluster_launch", bool),
 ]
 
 

From 373331c21d7fca05409a242d60285b6377ec7208 Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Fri, 17 Jan 2025 16:05:33 -0800
Subject: [PATCH 06/10] convert to bools where appropriate

---
 cuda_core/cuda/core/experimental/_device.py | 81 ++++++++++-----------
 1 file changed, 40 insertions(+), 41 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 6b7e6ae4..6ff5a8a7 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -28,8 +28,6 @@ class DeviceProperties:
         Total amount of global memory available on the device in bytes.
     shared_mem_per_block : int
         Maximum amount of shared memory available to a thread block in bytes.
-    shared_mem_per_block_optin : int
-        Maximum shared memory per block usable by special opt in
     regs_per_block : int
         Maximum number of 32-bit registers available to a thread block.
     warp_size : int
@@ -46,7 +44,7 @@ class DeviceProperties:
     clock_rate : int
         Clock frequency in kilohertz.
     cluster_launch : bool
-        Indicates device supports cluster launch.
+        Indicates whether or not the device supports cluster launch.
     total_const_mem : int
         Total amount of constant memory available on the device in bytes.
     major : int
@@ -59,16 +57,17 @@ class DeviceProperties:
     texture_pitch_alignment : int
         Pitch alignment requirement for 2D texture references that are bound to pitched memory.
     device_overlap : bool
-        1 if the device can concurrently copy memory between host and device while executing a kernel, or 0 if not.
+        Indicates whether or not the device can concurrently copy memory between host and device while executing
+        a kernel.
     multi_processor_count : int
         Number of multiprocessors on the device.
     kernel_exec_timeout_enabled : bool
-        1 if there is a run time limit for kernels executed on the device, or 0 if not.
+        Indicates whether or not there is a run time limit for kernels executed on the device.
     integrated : bool
-        1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component.
+        Indicates whether or not the device is an integrated (motherboard) GPU.
     can_map_host_memory : bool
-        1 if the device can map host memory into the CUDA address space for use with
-        cudaHostAlloc()/cudaHostGetDevicePointer(), or 0 if not.
+        Indicates whether or not the device can map host memory into the CUDA address space for use with
+        cudaHostAlloc()/cudaHostGetDevicePointer().
     compute_mode : int
         Compute mode that the device is currently in.
     max_texture_1d : int
@@ -114,10 +113,10 @@ class DeviceProperties:
     surface_alignment : int
         Alignment requirements for surfaces.
     concurrent_kernels : bool
-        1 if the device supports executing multiple kernels within the same context simultaneously, or 0 if
-        not.
+        Indicates whether or not the device supports executing multiple kernels within the same context
+        simultaneously.
     ecc_enabled : bool
-        1 if the device has ECC support turned on, or 0 if not.
+        Indicates whether or not the device has ECC support turned on.
     pci_bus_id : int
         PCI bus identifier of the device.
     pci_device_id : int
@@ -125,13 +124,13 @@ class DeviceProperties:
     pci_domain_id : int
         PCI domain identifier of the device.
     tcc_driver : bool
-        1 if the device is using a TCC driver or 0 if not.
+        Indicates whether or not the device is using a TCC driver.
     async_engine_count : int
         1 when the device can concurrently copy memory between host and device while executing a kernel.
         It is 2 when the device can concurrently copy memory between host and device in both directions
         and execute a kernel at the same time. It is 0 if neither of these is supported.
     unified_addressing : bool
-        1 if the device shares a unified address space with the host and 0 otherwise.
+        Indicates whether or not the device shares a unified address space with the host.
     memory_clock_rate : int
         Peak memory clock frequency in kilohertz.
     memory_bus_width : int
@@ -143,11 +142,11 @@ class DeviceProperties:
     max_threads_per_multi_processor : int
         Number of maximum resident threads per multiprocessor.
     stream_priorities_supported : bool
-        1 if the device supports stream priorities, or 0 if it is not supported.
+        Indicates whether or not the device supports stream priorities.
     global_l1_cache_supported : bool
-        1 if the device supports caching of globals in L1 cache, or 0 if it is not supported.
+        Indicates whether or not the device supports caching of globals in L1 cache.
     local_l1_cache_supported : bool
-        1 if the device supports caching of locals in L1 cache, or 0 if it is not supported.
+        Indicates whether or not the device supports caching of locals in L1 cache.
     shared_mem_per_multiprocessor : int
         Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all
         thread blocks simultaneously resident on a multiprocessor.
@@ -155,9 +154,9 @@ class DeviceProperties:
         Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread
         blocks simultaneously resident on a multiprocessor.
     managed_memory : bool
-        1 if the device supports allocating managed memory on this system, or 0 if it is not supported.
+        Indicates whether or not the device supports allocating managed memory on this system.
     is_multi_gpu_board : bool
-        1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not.
+        Indicates whether or not the device is on a multi-GPU board (e.g. Gemini cards).
     multi_gpu_board_group_id : int
         Unique identifier for a group of devices associated with the same board. Devices on the same
         multi-GPU board will share the same identifier.
@@ -165,40 +164,43 @@ class DeviceProperties:
         Ratio of single precision performance (in floating-point operations per second) to double precision
         performance.
     pageable_memory_access : bool
-        1 if the device supports coherently accessing pageable memory without calling cudaHostRegister on it,
-        and 0 otherwise.
+        Indicates whether or not the device supports coherently accessing pageable memory without calling
+        cudaHostRegister on it.
     concurrent_managed_access : bool
-        1 if the device can coherently access managed memory concurrently with the CPU, and 0 otherwise.
+        Indicates whether or not the device can coherently access managed memory concurrently with the CPU.
     compute_preemption_supported : bool
-        1 if the device supports Compute Preemption, and 0 otherwise.
+        Indicates whether or not the device supports Compute Preemption.
     can_use_host_pointer_for_registered_mem : bool
-        1 if the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise.
+        Indicates whether or not the device can access host registered memory at the same virtual address as
+        the CPU.
     cooperative_launch : bool
-        1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernel, and 0 otherwise.
+        Indicates whether or not the device supports launching cooperative kernels via
+        cudaLaunchCooperativeKernel.
     cooperative_multi_device_launch : bool
-        1 if the device supports launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice, and 0
-        otherwise.
+        Indicates whether or not the device supports launching cooperative kernels via
+        cudaLaunchCooperativeKernelMultiDevice.
     pageable_memory_access_uses_host_page_tables : bool
-        1 if the device accesses pageable memory via the host's page tables, and 0 otherwise.
+        Indicates whether or not the device accesses pageable memory via the host's page tables.
     direct_managed_mem_access_from_host : bool
-        1 if the host can directly access managed memory on the device without migration, and 0 otherwise.
+        Indicates whether or not the host can directly access managed memory on the device without migration.
     access_policy_max_window_size : int
         Maximum value of cudaAccessPolicyWindow::num_bytes.
     reserved_shared_mem_per_block : int
         Shared memory reserved by CUDA driver per block in bytes.
     host_register_supported : bool
-        1 if the device supports host memory registration via cudaHostRegister, and 0 otherwise.
+        Indicates whether or not the device supports host memory registration via cudaHostRegister.
     sparse_cuda_array_supported : bool
-        1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise.
+        Indicates whether or not the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays.
     host_register_read_only_supported : bool
-        1 if the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory
+        Indicates whether or not the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly
+        to register memory
         that must be mapped as read-only to the GPU.
     timeline_semaphore_interop_supported : bool
-        1 if external timeline semaphore interop is supported on the device, 0 otherwise.
+        Indicates whether or not external timeline semaphore interop is supported on the device.
     memory_pools_supported : bool
-        1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise.
+        Indicates whether or not the device supports using the cudaMallocAsync and cudaMemPool family of APIs.
     gpu_direct_rdma_supported : bool
-        1 if the device supports GPUDirect RDMA APIs, 0 otherwise.
+        Indicates whether or not the device supports GPUDirect RDMA APIs.
     gpu_direct_rdma_flush_writes_options : int
         Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum.
     gpu_direct_rdma_writes_ordering : int
@@ -206,13 +208,13 @@ class DeviceProperties:
     memory_pool_supported_handle_types : int
         Bitmask of handle types supported with mempool-based IPC.
     deferred_mapping_cuda_array_supported : bool
-        1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.
+        Indicates whether or not the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.
     ipc_event_supported : bool
-        1 if the device supports IPC Events, and 0 otherwise.
+        Indicates whether or not the device supports IPC Events.
     unified_function_pointers : bool
-        1 if the device supports unified pointers, and 0 otherwise.
+        Indicates whether or not the device supports unified pointers.
     host_native_atomic_supported : bool
-        1 if the link between the device and the host supports native atomic operations.
+        Indicates whether or not the link between the device and the host supports native atomic operations.
     luid : bytes
         8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms.
     luid_device_node_mask : int
@@ -230,7 +232,6 @@ def _init(device_id):
         self.uuid = prop.uuid.bytes
         self.total_global_mem = prop.totalGlobalMem
         self.shared_mem_per_block = prop.sharedMemPerBlock
-        self.shared_mem_per_block_optin = prop.sharedMemPerBlockOptin
         self.regs_per_block = prop.regsPerBlock
         self.warp_size = prop.warpSize
         self.mem_pitch = prop.memPitch
@@ -252,7 +253,6 @@ def _init(device_id):
         self.compute_mode = prop.computeMode
         self.max_texture_1d = prop.maxTexture1D
         self.max_texture_1d_mipmap = prop.maxTexture1DMipmap
-        self.max_texture_1d_linear = prop.maxTexture1DLinear
         self.max_texture_2d = tuple(prop.maxTexture2D)
         self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap)
         self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear)
@@ -292,7 +292,6 @@ def _init(device_id):
         self.managed_memory = bool(prop.managedMemory)
         self.is_multi_gpu_board = bool(prop.isMultiGpuBoard)
         self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID
-        self.single_to_double_precision_perf_ratio = prop.singleToDoublePrecisionPerfRatio
         self.pageable_memory_access = bool(prop.pageableMemoryAccess)
         self.concurrent_managed_access = bool(prop.concurrentManagedAccess)
         self.compute_preemption_supported = bool(prop.computePreemptionSupported)

From 9fe5633b22f1192f4c98733ac238a4c35f9d752c Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Wed, 22 Jan 2025 15:06:50 -0800
Subject: [PATCH 07/10] hook into attributes instead of the prop struct

---
 cuda_core/cuda/core/experimental/_device.py | 1307 ++++++++++++++-----
 1 file changed, 1006 insertions(+), 301 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 6ff5a8a7..37987395 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -14,314 +14,1019 @@
 _tls_lock = threading.Lock()
 
 
+# ruff: noqa
 class DeviceProperties:
     """
-    Represents the properties of a CUDA device.
-
-    Attributes
-    ----------
-    name : str
-        ASCII string identifying the device.
-    uuid : cudaUUID_t
-        16-byte unique identifier.
-    total_global_mem : int
-        Total amount of global memory available on the device in bytes.
-    shared_mem_per_block : int
-        Maximum amount of shared memory available to a thread block in bytes.
-    regs_per_block : int
-        Maximum number of 32-bit registers available to a thread block.
-    warp_size : int
-        Warp size in threads.
-    mem_pitch : int
-        Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through
-        cudaMallocPitch().
-    max_threads_per_block : int
-        Maximum number of threads per block.
-    max_threads_dim : tuple
-        Maximum size of each dimension of a block.
-    max_grid_size : tuple
-        Maximum size of each dimension of a grid.
-    clock_rate : int
-        Clock frequency in kilohertz.
-    cluster_launch : bool
-        Indicates whether or not the device supports cluster launch.
-    total_const_mem : int
-        Total amount of constant memory available on the device in bytes.
-    major : int
-        Major revision number defining the device's compute capability.
-    minor : int
-        Minor revision number defining the device's compute capability.
-    texture_alignment : int
-        Alignment requirement; texture base addresses that are aligned to textureAlignment bytes do not need an
-        offset applied to texture fetches.
-    texture_pitch_alignment : int
-        Pitch alignment requirement for 2D texture references that are bound to pitched memory.
-    device_overlap : bool
-        Indicates whether or not the device can concurrently copy memory between host and device while executing
-        a kernel.
-    multi_processor_count : int
-        Number of multiprocessors on the device.
-    kernel_exec_timeout_enabled : bool
-        Indicates whether or not there is a run time limit for kernels executed on the device.
-    integrated : bool
-        Indicates whether or not the device is an integrated (motherboard) GPU.
-    can_map_host_memory : bool
-        Indicates whether or not the device can map host memory into the CUDA address space for use with
-        cudaHostAlloc()/cudaHostGetDevicePointer().
-    compute_mode : int
-        Compute mode that the device is currently in.
-    max_texture_1d : int
-        Maximum 1D texture size.
-    max_texture_1d_mipmap : int
-        Maximum 1D mipmapped texture size.
-    max_texture_1d_linear : int
-        Maximum 1D texture size for textures bound to linear memory.
-    max_texture_2d : tuple
-        Maximum 2D texture dimensions.
-    max_texture_2d_mipmap : tuple
-        Maximum 2D mipmapped texture dimensions.
-    max_texture_2d_linear : tuple
-        Maximum 2D texture dimensions for 2D textures bound to pitch linear memory.
-    max_texture_2d_gather : tuple
-        Maximum 2D texture dimensions if texture gather operations have to be performed.
-    max_texture_3d : tuple
-        Maximum 3D texture dimensions.
-    max_texture_3d_alt : tuple
-        Maximum alternate 3D texture dimensions.
-    max_texture_cubemap : int
-        Maximum cubemap texture width or height.
-    max_texture_1d_layered : tuple
-        Maximum 1D layered texture dimensions.
-    max_texture_2d_layered : tuple
-        Maximum 2D layered texture dimensions.
-    max_texture_cubemap_layered : tuple
-        Maximum cubemap layered texture dimensions.
-    max_surface_1d : int
-        Maximum 1D surface size.
-    max_surface_2d : tuple
-        Maximum 2D surface dimensions.
-    max_surface_3d : tuple
-        Maximum 3D surface dimensions.
-    max_surface_1d_layered : tuple
-        Maximum 1D layered surface dimensions.
-    max_surface_2d_layered : tuple
-        Maximum 2D layered surface dimensions.
-    max_surface_cubemap : int
-        Maximum cubemap surface width or height.
-    max_surface_cubemap_layered : tuple
-        Maximum cubemap layered surface dimensions.
-    surface_alignment : int
-        Alignment requirements for surfaces.
-    concurrent_kernels : bool
-        Indicates whether or not the device supports executing multiple kernels within the same context
-        simultaneously.
-    ecc_enabled : bool
-        Indicates whether or not the device has ECC support turned on.
-    pci_bus_id : int
-        PCI bus identifier of the device.
-    pci_device_id : int
-        PCI device (sometimes called slot) identifier of the device.
-    pci_domain_id : int
-        PCI domain identifier of the device.
-    tcc_driver : bool
-        Indicates whether or not the device is using a TCC driver.
-    async_engine_count : int
-        1 when the device can concurrently copy memory between host and device while executing a kernel.
-        It is 2 when the device can concurrently copy memory between host and device in both directions
-        and execute a kernel at the same time. It is 0 if neither of these is supported.
-    unified_addressing : bool
-        Indicates whether or not the device shares a unified address space with the host.
-    memory_clock_rate : int
-        Peak memory clock frequency in kilohertz.
-    memory_bus_width : int
-        Memory bus width in bits.
-    l2_cache_size : int
-        L2 cache size in bytes.
-    persisting_l2_cache_max_size : int
-        L2 cache's maximum persisting lines size in bytes.
-    max_threads_per_multi_processor : int
-        Number of maximum resident threads per multiprocessor.
-    stream_priorities_supported : bool
-        Indicates whether or not the device supports stream priorities.
-    global_l1_cache_supported : bool
-        Indicates whether or not the device supports caching of globals in L1 cache.
-    local_l1_cache_supported : bool
-        Indicates whether or not the device supports caching of locals in L1 cache.
-    shared_mem_per_multiprocessor : int
-        Maximum amount of shared memory available to a multiprocessor in bytes; this amount is shared by all
-        thread blocks simultaneously resident on a multiprocessor.
-    regs_per_multiprocessor : int
-        Maximum number of 32-bit registers available to a multiprocessor; this number is shared by all thread
-        blocks simultaneously resident on a multiprocessor.
-    managed_memory : bool
-        Indicates whether or not the device supports allocating managed memory on this system.
-    is_multi_gpu_board : bool
-        Indicates whether or not the device is on a multi-GPU board (e.g. Gemini cards).
-    multi_gpu_board_group_id : int
-        Unique identifier for a group of devices associated with the same board. Devices on the same
-        multi-GPU board will share the same identifier.
-    single_to_double_precision_perf_ratio : int
-        Ratio of single precision performance (in floating-point operations per second) to double precision
-        performance.
-    pageable_memory_access : bool
-        Indicates whether or not the device supports coherently accessing pageable memory without calling
-        cudaHostRegister on it.
-    concurrent_managed_access : bool
-        Indicates whether or not the device can coherently access managed memory concurrently with the CPU.
-    compute_preemption_supported : bool
-        Indicates whether or not the device supports Compute Preemption.
-    can_use_host_pointer_for_registered_mem : bool
-        Indicates whether or not the device can access host registered memory at the same virtual address as
-        the CPU.
-    cooperative_launch : bool
-        Indicates whether or not the device supports launching cooperative kernels via
-        cudaLaunchCooperativeKernel.
-    cooperative_multi_device_launch : bool
-        Indicates whether or not the device supports launching cooperative kernels via
-        cudaLaunchCooperativeKernelMultiDevice.
-    pageable_memory_access_uses_host_page_tables : bool
-        Indicates whether or not the device accesses pageable memory via the host's page tables.
-    direct_managed_mem_access_from_host : bool
-        Indicates whether or not the host can directly access managed memory on the device without migration.
-    access_policy_max_window_size : int
-        Maximum value of cudaAccessPolicyWindow::num_bytes.
-    reserved_shared_mem_per_block : int
-        Shared memory reserved by CUDA driver per block in bytes.
-    host_register_supported : bool
-        Indicates whether or not the device supports host memory registration via cudaHostRegister.
-    sparse_cuda_array_supported : bool
-        Indicates whether or not the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays.
-    host_register_read_only_supported : bool
-        Indicates whether or not the device supports using the cudaHostRegister flag cudaHostRegisterReadOnly
-        to register memory
-        that must be mapped as read-only to the GPU.
-    timeline_semaphore_interop_supported : bool
-        Indicates whether or not external timeline semaphore interop is supported on the device.
-    memory_pools_supported : bool
-        Indicates whether or not the device supports using the cudaMallocAsync and cudaMemPool family of APIs.
-    gpu_direct_rdma_supported : bool
-        Indicates whether or not the device supports GPUDirect RDMA APIs.
-    gpu_direct_rdma_flush_writes_options : int
-        Bitmask to be interpreted according to the cudaFlushGPUDirectRDMAWritesOptions enum.
-    gpu_direct_rdma_writes_ordering : int
-        See the cudaGPUDirectRDMAWritesOrdering enum for numerical values.
-    memory_pool_supported_handle_types : int
-        Bitmask of handle types supported with mempool-based IPC.
-    deferred_mapping_cuda_array_supported : bool
-        Indicates whether or not the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.
-    ipc_event_supported : bool
-        Indicates whether or not the device supports IPC Events.
-    unified_function_pointers : bool
-        Indicates whether or not the device supports unified pointers.
-    host_native_atomic_supported : bool
-        Indicates whether or not the link between the device and the host supports native atomic operations.
-    luid : bytes
-        8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms.
-    luid_device_node_mask : int
-        LUID device node mask. Value is undefined on TCC and non-Windows platforms.
-    max_blocks_per_multi_processor : int
-        Maximum number of resident blocks per multiprocessor.
+    A class to query various attributes of a CUDA device.
+
+    Attributes are read-only and provide information about the device.
+
+    Attributes:
+        max_threads_per_block (int): Maximum number of threads per block.
+        max_block_dim_x (int): Maximum x-dimension of a block.
+        max_block_dim_y (int): Maximum y-dimension of a block.
+        max_block_dim_z (int): Maximum z-dimension of a block.
+        max_grid_dim_x (int): Maximum x-dimension of a grid.
+        max_grid_dim_y (int): Maximum y-dimension of a grid.
+        max_grid_dim_z (int): Maximum z-dimension of a grid.
+        max_shared_memory_per_block (int): Maximum amount of shared memory available to a thread block in bytes.
+        total_constant_memory (int): Memory available on device for __constant__ variables in a CUDA C kernel in bytes.
+        warp_size (int): Warp size in threads.
+        max_pitch (int): Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch().
+        maximum_texture1d_width (int): Maximum 1D texture width.
+        maximum_texture1d_linear_width (int): Maximum width for a 1D texture bound to linear memory.
+        maximum_texture1d_mipmapped_width (int): Maximum mipmapped 1D texture width.
+        maximum_texture2d_width (int): Maximum 2D texture width.
+        maximum_texture2d_height (int): Maximum 2D texture height.
+        maximum_texture2d_linear_width (int): Maximum width for a 2D texture bound to linear memory.
+        maximum_texture2d_linear_height (int): Maximum height for a 2D texture bound to linear memory.
+        maximum_texture2d_linear_pitch (int): Maximum pitch in bytes for a 2D texture bound to linear memory.
+        maximum_texture2d_mipmapped_width (int): Maximum mipmapped 2D texture width.
+        maximum_texture2d_mipmapped_height (int): Maximum mipmapped 2D texture height.
+        maximum_texture3d_width (int): Maximum 3D texture width.
+        maximum_texture3d_height (int): Maximum 3D texture height.
+        maximum_texture3d_depth (int): Maximum 3D texture depth.
+        maximum_texture3d_width_alternate (int): Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported.
+        maximum_texture3d_height_alternate (int): Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported.
+        maximum_texture3d_depth_alternate (int): Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported.
+        maximum_texturecubemap_width (int): Maximum cubemap texture width or height.
+        maximum_texture1d_layered_width (int): Maximum 1D layered texture width.
+        maximum_texture1d_layered_layers (int): Maximum layers in a 1D layered texture.
+        maximum_texture2d_layered_width (int): Maximum 2D layered texture width.
+        maximum_texture2d_layered_height (int): Maximum 2D layered texture height.
+        maximum_texture2d_layered_layers (int): Maximum layers in a 2D layered texture.
+        maximum_texturecubemap_layered_width (int): Maximum cubemap layered texture width or height.
+        maximum_texturecubemap_layered_layers (int): Maximum layers in a cubemap layered texture.
+        maximum_surface1d_width (int): Maximum 1D surface width.
+        maximum_surface2d_width (int): Maximum 2D surface width.
+        maximum_surface2d_height (int): Maximum 2D surface height.
+        maximum_surface3d_width (int): Maximum 3D surface width.
+        maximum_surface3d_height (int): Maximum 3D surface height.
+        maximum_surface3d_depth (int): Maximum 3D surface depth.
+        maximum_surface1d_layered_width (int): Maximum 1D layered surface width.
+        maximum_surface1d_layered_layers (int): Maximum layers in a 1D layered surface.
+        maximum_surface2d_layered_width (int): Maximum 2D layered surface width.
+        maximum_surface2d_layered_height (int): Maximum 2D layered surface height.
+        maximum_surface2d_layered_layers (int): Maximum layers in a 2D layered surface.
+        maximum_surfacecubemap_width (int): Maximum cubemap surface width.
+        maximum_surfacecubemap_layered_width (int): Maximum cubemap layered surface width.
+        maximum_surfacecubemap_layered_layers (int): Maximum layers in a cubemap layered surface.
+        max_registers_per_block (int): Maximum number of 32-bit registers available to a thread block.
+        clock_rate (int): The typical clock frequency in kilohertz.
+        texture_alignment (int): Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied to texture fetches.
+        texture_pitch_alignment (int): Pitch alignment requirement for 2D texture references bound to pitched memory.
+        gpu_overlap (bool): True if the device can concurrently copy memory between host and device while executing a kernel, False if not.
+        multiprocessor_count (int): Number of multiprocessors on the device.
+        kernel_exec_timeout (bool): True if there is a run time limit for kernels executed on the device, False if not.
+        integrated (bool): True if the device is integrated with the memory subsystem, False if not.
+        can_map_host_memory (bool): True if the device can map host memory into the CUDA address space, False if not.
+        compute_mode (int): Compute mode that device is currently in.
+        concurrent_kernels (bool): True if the device supports executing multiple kernels within the same context simultaneously, False if not.
+        ecc_enabled (bool): True if error correction is enabled on the device, False if error correction is disabled or not supported by the device.
+        pci_bus_id (int): PCI bus identifier of the device.
+        pci_device_id (int): PCI device (also known as slot) identifier of the device.
+        pci_domain_id (int): PCI domain identifier of the device.
+        tcc_driver (bool): True if the device is using a TCC driver, False if not.
+        memory_clock_rate (int): Peak memory clock frequency in kilohertz.
+        global_memory_bus_width (int): Global memory bus width in bits.
+        l2_cache_size (int): Size of L2 cache in bytes, 0 if the device doesn't have L2 cache.
+        max_threads_per_multiprocessor (int): Maximum resident threads per multiprocessor.
+        unified_addressing (bool): True if the device shares a unified address space with the host, False if not.
+        compute_capability_major (int): Major compute capability version number.
+        compute_capability_minor (int): Minor compute capability version number.
+        global_l1_cache_supported (bool): True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported by the device.
+        local_l1_cache_supported (bool): True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported by the device.
+        max_shared_memory_per_multiprocessor (int): Maximum amount of shared memory available to a multiprocessor in bytes.
+        max_registers_per_multiprocessor (int): Maximum number of 32-bit registers available to a multiprocessor.
+        managed_memory (bool): True if device supports allocating managed memory on this system, False if allocating managed memory is not supported by the device on this system.
+        multi_gpu_board (bool): True if device is on a multi-GPU board, False if not.
+        multi_gpu_board_group_id (int): Unique identifier for a group of devices associated with the same board.
+        host_native_atomic_supported (bool): True if Link between the device and the host supports native atomic operations, False if not.
+        single_to_double_precision_perf_ratio (int): Ratio of single precision performance (in floating-point operations per second) to double precision performance.
+        pageable_memory_access (bool): True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, False if not.
+        concurrent_managed_access (bool): True if device can coherently access managed memory concurrently with the CPU, False if not.
+        compute_preemption_supported (bool): True if device supports Compute Preemption, False if not.
+        can_use_host_pointer_for_registered_mem (bool): True if device can access host registered memory at the same virtual address as the CPU, False if not.
+        max_shared_memory_per_block_optin (int): The maximum per block shared memory size supported on this device.
+        pageable_memory_access_uses_host_page_tables (bool): True if device accesses pageable memory via the host's page tables, False if not.
+        direct_managed_mem_access_from_host (bool): True if the host can directly access managed memory on the device without migration, False if not.
+        virtual_memory_management_supported (bool): True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs, False if not.
+        handle_type_posix_file_descriptor_supported (bool): True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, False if not.
+        handle_type_win32_handle_supported (bool): True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, False if not.
+        handle_type_win32_kmt_handle_supported (bool): True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, False if not.
+        max_blocks_per_multiprocessor (int): Maximum number of thread blocks that can reside on a multiprocessor.
+        generic_compression_supported (bool): True if device supports compressible memory allocation via cuMemCreate, False if not.
+        max_persisting_l2_cache_size (int): Maximum L2 persisting lines capacity setting in bytes.
+        max_access_policy_window_size (int): Maximum value of CUaccessPolicyWindow::num_bytes.
+        gpu_direct_rdma_with_cuda_vmm_supported (bool): True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not.
+        reserved_shared_memory_per_block (int): Amount of shared memory per block reserved by CUDA driver in bytes.
+        sparse_cuda_array_supported (bool): True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not.
+        read_only_host_register_supported (bool): True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not.
+        memory_pools_supported (bool): True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not.
+        gpu_direct_rdma_supported (bool): True if device supports GPUDirect RDMA APIs, False if not.
+        gpu_direct_rdma_flush_writes_options (int): The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum.
+        gpu_direct_rdma_writes_ordering (int): GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute.
+        mempool_supported_handle_types (int): Bitmask of handle types supported with mempool based IPC.
+        deferred_mapping_cuda_array_supported (bool): True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not.
+        numa_config (int): NUMA configuration of a device.
+        numa_id (int): NUMA node ID of the GPU memory.
+        multicast_supported (bool): True if device supports switch multicast and reduction operations, False if not.
     """
 
-    def _init(device_id):
-        self = DeviceProperties.__new__(DeviceProperties)
+    def __init__(self):
+        raise RuntimeError("DeviceProperties should not be instantiated directly")
+
+    slots = "_handle"
 
-        prop = handle_return(runtime.cudaGetDeviceProperties(device_id))
-
-        self.name = prop.name.decode("utf-8")
-        self.uuid = prop.uuid.bytes
-        self.total_global_mem = prop.totalGlobalMem
-        self.shared_mem_per_block = prop.sharedMemPerBlock
-        self.regs_per_block = prop.regsPerBlock
-        self.warp_size = prop.warpSize
-        self.mem_pitch = prop.memPitch
-        self.max_threads_per_block = prop.maxThreadsPerBlock
-        self.max_threads_dim = tuple(prop.maxThreadsDim)
-        self.max_grid_size = tuple(prop.maxGridSize)
-        self.clock_rate = prop.clockRate
-        self.cluster_launch = bool(prop.clusterLaunch)
-        self.total_const_mem = prop.totalConstMem
-        self.major = prop.major
-        self.minor = prop.minor
-        self.texture_alignment = prop.textureAlignment
-        self.texture_pitch_alignment = prop.texturePitchAlignment
-        self.device_overlap = bool(prop.deviceOverlap)
-        self.multi_processor_count = prop.multiProcessorCount
-        self.kernel_exec_timeout_enabled = bool(prop.kernelExecTimeoutEnabled)
-        self.integrated = bool(prop.integrated)
-        self.can_map_host_memory = bool(prop.canMapHostMemory)
-        self.compute_mode = prop.computeMode
-        self.max_texture_1d = prop.maxTexture1D
-        self.max_texture_1d_mipmap = prop.maxTexture1DMipmap
-        self.max_texture_2d = tuple(prop.maxTexture2D)
-        self.max_texture_2d_mipmap = tuple(prop.maxTexture2DMipmap)
-        self.max_texture_2d_linear = tuple(prop.maxTexture2DLinear)
-        self.max_texture_2d_gather = tuple(prop.maxTexture2DGather)
-        self.max_texture_3d = tuple(prop.maxTexture3D)
-        self.max_texture_3d_alt = tuple(prop.maxTexture3DAlt)
-        self.max_texture_cubemap = prop.maxTextureCubemap
-        self.max_texture_1d_layered = tuple(prop.maxTexture1DLayered)
-        self.max_texture_2d_layered = tuple(prop.maxTexture2DLayered)
-        self.max_texture_cubemap_layered = tuple(prop.maxTextureCubemapLayered)
-        self.max_surface_1d = prop.maxSurface1D
-        self.max_surface_2d = tuple(prop.maxSurface2D)
-        self.max_surface_3d = tuple(prop.maxSurface3D)
-        self.max_surface_1d_layered = tuple(prop.maxSurface1DLayered)
-        self.max_surface_2d_layered = tuple(prop.maxSurface2DLayered)
-        self.max_surface_cubemap = prop.maxSurfaceCubemap
-        self.max_surface_cubemap_layered = tuple(prop.maxSurfaceCubemapLayered)
-        self.surface_alignment = prop.surfaceAlignment
-        self.concurrent_kernels = bool(prop.concurrentKernels)
-        self.ecc_enabled = bool(prop.ECCEnabled)
-        self.pci_bus_id = prop.pciBusID
-        self.pci_device_id = prop.pciDeviceID
-        self.pci_domain_id = prop.pciDomainID
-        self.tcc_driver = bool(prop.tccDriver)
-        self.async_engine_count = prop.asyncEngineCount
-        self.unified_addressing = bool(prop.unifiedAddressing)
-        self.memory_clock_rate = prop.memoryClockRate
-        self.memory_bus_width = prop.memoryBusWidth
-        self.l2_cache_size = prop.l2CacheSize
-        self.persisting_l2_cache_max_size = prop.persistingL2CacheMaxSize
-        self.max_threads_per_multi_processor = prop.maxThreadsPerMultiProcessor
-        self.stream_priorities_supported = bool(prop.streamPrioritiesSupported)
-        self.global_l1_cache_supported = bool(prop.globalL1CacheSupported)
-        self.local_l1_cache_supported = bool(prop.localL1CacheSupported)
-        self.shared_mem_per_multiprocessor = prop.sharedMemPerMultiprocessor
-        self.regs_per_multiprocessor = prop.regsPerMultiprocessor
-        self.managed_memory = bool(prop.managedMemory)
-        self.is_multi_gpu_board = bool(prop.isMultiGpuBoard)
-        self.multi_gpu_board_group_id = prop.multiGpuBoardGroupID
-        self.pageable_memory_access = bool(prop.pageableMemoryAccess)
-        self.concurrent_managed_access = bool(prop.concurrentManagedAccess)
-        self.compute_preemption_supported = bool(prop.computePreemptionSupported)
-        self.can_use_host_pointer_for_registered_mem = bool(prop.canUseHostPointerForRegisteredMem)
-        self.cooperative_launch = bool(prop.cooperativeLaunch)
-        self.cooperative_multi_device_launch = bool(prop.cooperativeMultiDeviceLaunch)
-        self.pageable_memory_access_uses_host_page_tables = bool(prop.pageableMemoryAccessUsesHostPageTables)
-        self.direct_managed_mem_access_from_host = bool(prop.directManagedMemAccessFromHost)
-        self.access_policy_max_window_size = prop.accessPolicyMaxWindowSize
-        self.reserved_shared_mem_per_block = prop.reservedSharedMemPerBlock
-        self.host_register_supported = bool(prop.hostRegisterSupported)
-        self.sparse_cuda_array_supported = bool(prop.sparseCudaArraySupported)
-        self.host_register_read_only_supported = bool(prop.hostRegisterReadOnlySupported)
-        self.timeline_semaphore_interop_supported = bool(prop.timelineSemaphoreInteropSupported)
-        self.memory_pools_supported = bool(prop.memoryPoolsSupported)
-        self.gpu_direct_rdma_supported = bool(prop.gpuDirectRDMASupported)
-        self.gpu_direct_rdma_flush_writes_options = prop.gpuDirectRDMAFlushWritesOptions
-        self.gpu_direct_rdma_writes_ordering = prop.gpuDirectRDMAWritesOrdering
-        self.memory_pool_supported_handle_types = prop.memoryPoolSupportedHandleTypes
-        self.deferred_mapping_cuda_array_supported = bool(prop.deferredMappingCudaArraySupported)
-        self.ipc_event_supported = bool(prop.ipcEventSupported)
-        self.unified_function_pointers = bool(prop.unifiedFunctionPointers)
-        self.host_native_atomic_supported = bool(prop.hostNativeAtomicSupported)
-        self.luid = prop.luid
-        self.luid_device_node_mask = prop.luidDeviceNodeMask
-        self.max_blocks_per_multi_processor = prop.maxBlocksPerMultiProcessor
+    def _init(handle):
+        self = DeviceProperties.__new__(DeviceProperties)
+        self._handle = handle
         return self
 
-    def __init__(self, device_id):
-        raise RuntimeError("DeviceProperties should not be instantiated directly")
+    @property
+    def max_threads_per_block(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, self._handle
+            )
+        )
+
+    @property
+    def max_block_dim_x(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, self._handle)
+        )
+
+    @property
+    def max_block_dim_y(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, self._handle)
+        )
+
+    @property
+    def max_block_dim_z(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, self._handle)
+        )
+
+    @property
+    def max_grid_dim_x(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, self._handle)
+        )
+
+    @property
+    def max_grid_dim_y(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, self._handle)
+        )
+
+    @property
+    def max_grid_dim_z(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, self._handle)
+        )
+
+    @property
+    def max_shared_memory_per_block(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, self._handle
+            )
+        )
+
+    @property
+    def total_constant_memory(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, self._handle
+            )
+        )
+
+    @property
+    def warp_size(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, self._handle)
+        )
+
+    @property
+    def max_pitch(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, self._handle)
+        )
+
+    @property
+    def maximum_texture1d_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture1d_linear_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture1d_mipmapped_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_linear_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_linear_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_linear_pitch(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_mipmapped_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_mipmapped_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture3d_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture3d_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture3d_depth(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture3d_width_alternate(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture3d_height_alternate(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture3d_depth_alternate(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, self._handle
+            )
+        )
+
+    @property
+    def maximum_texturecubemap_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture1d_layered_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture1d_layered_layers(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_layered_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_layered_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_texture2d_layered_layers(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, self._handle
+            )
+        )
+
+    @property
+    def maximum_texturecubemap_layered_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_texturecubemap_layered_layers(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface1d_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface2d_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface2d_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface3d_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface3d_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface3d_depth(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface1d_layered_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface1d_layered_layers(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface2d_layered_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface2d_layered_height(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, self._handle
+            )
+        )
+
+    @property
+    def maximum_surface2d_layered_layers(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, self._handle
+            )
+        )
+
+    @property
+    def maximum_surfacecubemap_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surfacecubemap_layered_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def maximum_surfacecubemap_layered_layers(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, self._handle
+            )
+        )
+
+    @property
+    def max_registers_per_block(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, self._handle
+            )
+        )
+
+    @property
+    def clock_rate(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, self._handle)
+        )
+
+    @property
+    def texture_alignment(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, self._handle)
+        )
+
+    @property
+    def texture_pitch_alignment(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, self._handle
+            )
+        )
+
+    @property
+    def gpu_overlap(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, self._handle)
+            )
+        )
+
+    @property
+    def multiprocessor_count(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, self._handle
+            )
+        )
+
+    @property
+    def kernel_exec_timeout(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, self._handle
+                )
+            )
+        )
+
+    @property
+    def integrated(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, self._handle)
+            )
+        )
+
+    @property
+    def can_map_host_memory(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, self._handle
+                )
+            )
+        )
+
+    @property
+    def compute_mode(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, self._handle)
+        )
+
+    @property
+    def concurrent_kernels(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, self._handle
+                )
+            )
+        )
+
+    @property
+    def ecc_enabled(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, self._handle)
+            )
+        )
+
+    @property
+    def pci_bus_id(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, self._handle)
+        )
+
+    @property
+    def pci_device_id(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, self._handle)
+        )
+
+    @property
+    def pci_domain_id(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, self._handle)
+        )
+
+    @property
+    def tcc_driver(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, self._handle)
+            )
+        )
+
+    @property
+    def memory_clock_rate(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, self._handle)
+        )
+
+    @property
+    def global_memory_bus_width(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, self._handle
+            )
+        )
+
+    @property
+    def l2_cache_size(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, self._handle)
+        )
+
+    @property
+    def max_threads_per_multiprocessor(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, self._handle
+            )
+        )
+
+    @property
+    def unified_addressing(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, self._handle
+                )
+            )
+        )
+
+    @property
+    def compute_capability_major(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, self._handle
+            )
+        )
+
+    @property
+    def compute_capability_minor(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, self._handle
+            )
+        )
+
+    @property
+    def global_l1_cache_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def local_l1_cache_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def max_shared_memory_per_multiprocessor(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, self._handle
+            )
+        )
+
+    @property
+    def max_registers_per_multiprocessor(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, self._handle
+            )
+        )
+
+    @property
+    def managed_memory(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, self._handle)
+            )
+        )
+
+    @property
+    def multi_gpu_board(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, self._handle)
+            )
+        )
+
+    @property
+    def multi_gpu_board_group_id(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, self._handle
+            )
+        )
+
+    @property
+    def host_native_atomic_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def single_to_double_precision_perf_ratio(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, self._handle
+            )
+        )
+
+    @property
+    def pageable_memory_access(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, self._handle
+                )
+            )
+        )
+
+    @property
+    def concurrent_managed_access(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, self._handle
+                )
+            )
+        )
+
+    @property
+    def compute_preemption_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def can_use_host_pointer_for_registered_mem(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, self._handle
+                )
+            )
+        )
+
+    @property
+    def max_shared_memory_per_block_optin(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, self._handle
+            )
+        )
+
+    @property
+    def pageable_memory_access_uses_host_page_tables(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
+                    self._handle,
+                )
+            )
+        )
+
+    @property
+    def direct_managed_mem_access_from_host(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, self._handle
+                )
+            )
+        )
+
+    @property
+    def virtual_memory_management_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def handle_type_posix_file_descriptor_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
+                    self._handle,
+                )
+            )
+        )
+
+    @property
+    def handle_type_win32_handle_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def handle_type_win32_kmt_handle_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def max_blocks_per_multiprocessor(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, self._handle
+            )
+        )
+
+    @property
+    def generic_compression_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def max_persisting_l2_cache_size(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, self._handle
+            )
+        )
+
+    @property
+    def max_access_policy_window_size(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, self._handle
+            )
+        )
+
+    @property
+    def gpu_direct_rdma_with_cuda_vmm_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def reserved_shared_memory_per_block(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, self._handle
+            )
+        )
+
+    @property
+    def sparse_cuda_array_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def read_only_host_register_supported(self):
+        return bool(handle_return(driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY)))
+
+    @property
+    def memory_pools_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def gpu_direct_rdma_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def gpu_direct_rdma_flush_writes_options(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, self._handle
+            )
+        )
+
+    @property
+    def gpu_direct_rdma_writes_ordering(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, self._handle
+            )
+        )
+
+    @property
+    def mempool_supported_handle_types(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, self._handle
+            )
+        )
+
+    @property
+    def deferred_mapping_cuda_array_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED, self._handle
+                )
+            )
+        )
+
+    @property
+    def numa_config(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, self._handle)
+        )
+
+    @property
+    def numa_id(self):
+        return handle_return(
+            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, self._handle)
+        )
+
+    @property
+    def multicast_supported(self):
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED, self._handle
+                )
+            )
+        )
+
+
+# ruff: enable
 
 
 class Device:

From 570ea62d6490a4ba397ab2ccd909ff6510c26ed8 Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Thu, 23 Jan 2025 19:05:17 -0800
Subject: [PATCH 08/10] update test

---
 cuda_core/cuda/core/experimental/_device.py |   8 +-
 cuda_core/tests/test_device.py              | 152 +++++++++++---------
 2 files changed, 88 insertions(+), 72 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index 37987395..df1da741 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -947,7 +947,13 @@ def sparse_cuda_array_supported(self):
 
     @property
     def read_only_host_register_supported(self):
-        return bool(handle_return(driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY)))
+        return bool(
+            handle_return(
+                driver.cuDeviceGetAttribute(
+                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED, self._handle
+                )
+            )
+        )
 
     @property
     def memory_pools_supported(self):
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 0a441996..30287a38 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -81,109 +81,119 @@ def test_compute_capability():
     assert device.compute_capability == expected_cc
 
 
-def test_device_property_values():
-    device = Device()
-    assert device.properties.name == device.name
-    assert device.properties.uuid.hex() == device.uuid.replace("-", "")
-
-
 cuda_base_properties = [
-    ("name", str),
-    ("uuid", bytes),
-    ("total_global_mem", int),
-    ("shared_mem_per_block", int),
-    ("regs_per_block", int),
-    ("warp_size", int),
-    ("mem_pitch", int),
     ("max_threads_per_block", int),
-    ("max_threads_dim", tuple),
-    ("max_grid_size", tuple),
-    ("total_const_mem", int),
-    ("major", int),
-    ("minor", int),
+    ("max_block_dim_x", int),
+    ("max_block_dim_y", int),
+    ("max_block_dim_z", int),
+    ("max_grid_dim_x", int),
+    ("max_grid_dim_y", int),
+    ("max_grid_dim_z", int),
+    ("max_shared_memory_per_block", int),
+    ("total_constant_memory", int),
+    ("warp_size", int),
+    ("max_pitch", int),
+    ("maximum_texture1d_width", int),
+    ("maximum_texture1d_linear_width", int),
+    ("maximum_texture1d_mipmapped_width", int),
+    ("maximum_texture2d_width", int),
+    ("maximum_texture2d_height", int),
+    ("maximum_texture2d_linear_width", int),
+    ("maximum_texture2d_linear_height", int),
+    ("maximum_texture2d_linear_pitch", int),
+    ("maximum_texture2d_mipmapped_width", int),
+    ("maximum_texture2d_mipmapped_height", int),
+    ("maximum_texture3d_width", int),
+    ("maximum_texture3d_height", int),
+    ("maximum_texture3d_depth", int),
+    ("maximum_texture3d_width_alternate", int),
+    ("maximum_texture3d_height_alternate", int),
+    ("maximum_texture3d_depth_alternate", int),
+    ("maximum_texturecubemap_width", int),
+    ("maximum_texture1d_layered_width", int),
+    ("maximum_texture1d_layered_layers", int),
+    ("maximum_texture2d_layered_width", int),
+    ("maximum_texture2d_layered_height", int),
+    ("maximum_texture2d_layered_layers", int),
+    ("maximum_texturecubemap_layered_width", int),
+    ("maximum_texturecubemap_layered_layers", int),
+    ("maximum_surface1d_width", int),
+    ("maximum_surface2d_width", int),
+    ("maximum_surface2d_height", int),
+    ("maximum_surface3d_width", int),
+    ("maximum_surface3d_height", int),
+    ("maximum_surface3d_depth", int),
+    ("maximum_surface1d_layered_width", int),
+    ("maximum_surface1d_layered_layers", int),
+    ("maximum_surface2d_layered_width", int),
+    ("maximum_surface2d_layered_height", int),
+    ("maximum_surface2d_layered_layers", int),
+    ("maximum_surfacecubemap_width", int),
+    ("maximum_surfacecubemap_layered_width", int),
+    ("maximum_surfacecubemap_layered_layers", int),
+    ("max_registers_per_block", int),
+    ("clock_rate", int),
     ("texture_alignment", int),
     ("texture_pitch_alignment", int),
-    ("multi_processor_count", int),
+    ("gpu_overlap", bool),
+    ("multiprocessor_count", int),
+    ("kernel_exec_timeout", bool),
     ("integrated", bool),
     ("can_map_host_memory", bool),
-    ("max_texture_1d", int),
-    ("max_texture_1d_mipmap", int),
-    ("max_texture_2d", tuple),
-    ("max_texture_2d_mipmap", tuple),
-    ("max_texture_2d_linear", tuple),
-    ("max_texture_2d_gather", tuple),
-    ("max_texture_3d", tuple),
-    ("max_texture_3d_alt", tuple),
-    ("max_texture_cubemap", int),
-    ("max_texture_1d_layered", tuple),
-    ("max_texture_2d_layered", tuple),
-    ("max_texture_cubemap_layered", tuple),
-    ("max_surface_1d", int),
-    ("max_surface_2d", tuple),
-    ("max_surface_3d", tuple),
-    ("max_surface_1d_layered", tuple),
-    ("max_surface_2d_layered", tuple),
-    ("max_surface_cubemap", int),
-    ("max_surface_cubemap_layered", tuple),
-    ("surface_alignment", int),
+    ("compute_mode", int),
     ("concurrent_kernels", bool),
     ("ecc_enabled", bool),
     ("pci_bus_id", int),
     ("pci_device_id", int),
     ("pci_domain_id", int),
     ("tcc_driver", bool),
-    ("async_engine_count", int),
-    ("unified_addressing", bool),
-    ("memory_bus_width", int),
+    ("memory_clock_rate", int),
+    ("global_memory_bus_width", int),
     ("l2_cache_size", int),
-    ("persisting_l2_cache_max_size", int),
-    ("max_threads_per_multi_processor", int),
-    ("stream_priorities_supported", bool),
+    ("max_threads_per_multiprocessor", int),
+    ("unified_addressing", bool),
+    ("compute_capability_major", int),
+    ("compute_capability_minor", int),
     ("global_l1_cache_supported", bool),
     ("local_l1_cache_supported", bool),
-    ("shared_mem_per_multiprocessor", int),
-    ("regs_per_multiprocessor", int),
+    ("max_shared_memory_per_multiprocessor", int),
+    ("max_registers_per_multiprocessor", int),
     ("managed_memory", bool),
-    ("is_multi_gpu_board", bool),
+    ("multi_gpu_board", bool),
     ("multi_gpu_board_group_id", int),
+    ("host_native_atomic_supported", bool),
+    ("single_to_double_precision_perf_ratio", int),
     ("pageable_memory_access", bool),
     ("concurrent_managed_access", bool),
     ("compute_preemption_supported", bool),
     ("can_use_host_pointer_for_registered_mem", bool),
-    ("cooperative_launch", bool),
+    ("max_shared_memory_per_block_optin", int),
     ("pageable_memory_access_uses_host_page_tables", bool),
     ("direct_managed_mem_access_from_host", bool),
-    ("access_policy_max_window_size", int),
-    ("reserved_shared_mem_per_block", int),
-    ("host_register_supported", bool),
+    ("virtual_memory_management_supported", bool),
+    ("handle_type_posix_file_descriptor_supported", bool),
+    ("handle_type_win32_handle_supported", bool),
+    ("handle_type_win32_kmt_handle_supported", bool),
+    ("max_blocks_per_multiprocessor", int),
+    ("generic_compression_supported", bool),
+    ("max_persisting_l2_cache_size", int),
+    ("max_access_policy_window_size", int),
+    ("gpu_direct_rdma_with_cuda_vmm_supported", bool),
+    ("reserved_shared_memory_per_block", int),
     ("sparse_cuda_array_supported", bool),
-    ("host_register_read_only_supported", bool),
-    ("timeline_semaphore_interop_supported", bool),
+    ("read_only_host_register_supported", bool),
     ("memory_pools_supported", bool),
     ("gpu_direct_rdma_supported", bool),
     ("gpu_direct_rdma_flush_writes_options", int),
     ("gpu_direct_rdma_writes_ordering", int),
-    ("memory_pool_supported_handle_types", int),
+    ("mempool_supported_handle_types", int),
     ("deferred_mapping_cuda_array_supported", bool),
-    ("ipc_event_supported", bool),
-    ("unified_function_pointers", bool),
-]
-
-cuda_12_properties = [
-    ("host_native_atomic_supported", bool),
-    ("luid", bytes),
-    ("luid_device_node_mask", int),
-    ("max_blocks_per_multi_processor", int),
-    ("shared_mem_per_block_optin", int),
-    ("cluster_launch", bool),
+    ("numa_config", int),
+    ("numa_id", int),
+    ("multicast_supported", bool),
 ]
 
 
-driver_ver = handle_return(driver.cuDriverGetVersion())
-if driver_ver >= 12000:
-    cuda_base_properties += cuda_12_properties
-
-
 @pytest.mark.parametrize("property_name, expected_type", cuda_base_properties)
 def test_device_property_types(property_name, expected_type):
     device = Device()

From 3e94c64b9efdb9b9e85fcd0bc8e549bfee2081cd Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Mon, 27 Jan 2025 12:35:55 -0800
Subject: [PATCH 09/10] add type hints and individual attribute docstrings

---
 cuda_core/cuda/core/experimental/_device.py | 678 +++++++++++++-------
 1 file changed, 453 insertions(+), 225 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index df1da741..e2894ecc 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -14,123 +14,11 @@
 _tls_lock = threading.Lock()
 
 
-# ruff: noqa
 class DeviceProperties:
     """
     A class to query various attributes of a CUDA device.
 
     Attributes are read-only and provide information about the device.
-
-    Attributes:
-        max_threads_per_block (int): Maximum number of threads per block.
-        max_block_dim_x (int): Maximum x-dimension of a block.
-        max_block_dim_y (int): Maximum y-dimension of a block.
-        max_block_dim_z (int): Maximum z-dimension of a block.
-        max_grid_dim_x (int): Maximum x-dimension of a grid.
-        max_grid_dim_y (int): Maximum y-dimension of a grid.
-        max_grid_dim_z (int): Maximum z-dimension of a grid.
-        max_shared_memory_per_block (int): Maximum amount of shared memory available to a thread block in bytes.
-        total_constant_memory (int): Memory available on device for __constant__ variables in a CUDA C kernel in bytes.
-        warp_size (int): Warp size in threads.
-        max_pitch (int): Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch().
-        maximum_texture1d_width (int): Maximum 1D texture width.
-        maximum_texture1d_linear_width (int): Maximum width for a 1D texture bound to linear memory.
-        maximum_texture1d_mipmapped_width (int): Maximum mipmapped 1D texture width.
-        maximum_texture2d_width (int): Maximum 2D texture width.
-        maximum_texture2d_height (int): Maximum 2D texture height.
-        maximum_texture2d_linear_width (int): Maximum width for a 2D texture bound to linear memory.
-        maximum_texture2d_linear_height (int): Maximum height for a 2D texture bound to linear memory.
-        maximum_texture2d_linear_pitch (int): Maximum pitch in bytes for a 2D texture bound to linear memory.
-        maximum_texture2d_mipmapped_width (int): Maximum mipmapped 2D texture width.
-        maximum_texture2d_mipmapped_height (int): Maximum mipmapped 2D texture height.
-        maximum_texture3d_width (int): Maximum 3D texture width.
-        maximum_texture3d_height (int): Maximum 3D texture height.
-        maximum_texture3d_depth (int): Maximum 3D texture depth.
-        maximum_texture3d_width_alternate (int): Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported.
-        maximum_texture3d_height_alternate (int): Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported.
-        maximum_texture3d_depth_alternate (int): Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported.
-        maximum_texturecubemap_width (int): Maximum cubemap texture width or height.
-        maximum_texture1d_layered_width (int): Maximum 1D layered texture width.
-        maximum_texture1d_layered_layers (int): Maximum layers in a 1D layered texture.
-        maximum_texture2d_layered_width (int): Maximum 2D layered texture width.
-        maximum_texture2d_layered_height (int): Maximum 2D layered texture height.
-        maximum_texture2d_layered_layers (int): Maximum layers in a 2D layered texture.
-        maximum_texturecubemap_layered_width (int): Maximum cubemap layered texture width or height.
-        maximum_texturecubemap_layered_layers (int): Maximum layers in a cubemap layered texture.
-        maximum_surface1d_width (int): Maximum 1D surface width.
-        maximum_surface2d_width (int): Maximum 2D surface width.
-        maximum_surface2d_height (int): Maximum 2D surface height.
-        maximum_surface3d_width (int): Maximum 3D surface width.
-        maximum_surface3d_height (int): Maximum 3D surface height.
-        maximum_surface3d_depth (int): Maximum 3D surface depth.
-        maximum_surface1d_layered_width (int): Maximum 1D layered surface width.
-        maximum_surface1d_layered_layers (int): Maximum layers in a 1D layered surface.
-        maximum_surface2d_layered_width (int): Maximum 2D layered surface width.
-        maximum_surface2d_layered_height (int): Maximum 2D layered surface height.
-        maximum_surface2d_layered_layers (int): Maximum layers in a 2D layered surface.
-        maximum_surfacecubemap_width (int): Maximum cubemap surface width.
-        maximum_surfacecubemap_layered_width (int): Maximum cubemap layered surface width.
-        maximum_surfacecubemap_layered_layers (int): Maximum layers in a cubemap layered surface.
-        max_registers_per_block (int): Maximum number of 32-bit registers available to a thread block.
-        clock_rate (int): The typical clock frequency in kilohertz.
-        texture_alignment (int): Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied to texture fetches.
-        texture_pitch_alignment (int): Pitch alignment requirement for 2D texture references bound to pitched memory.
-        gpu_overlap (bool): True if the device can concurrently copy memory between host and device while executing a kernel, False if not.
-        multiprocessor_count (int): Number of multiprocessors on the device.
-        kernel_exec_timeout (bool): True if there is a run time limit for kernels executed on the device, False if not.
-        integrated (bool): True if the device is integrated with the memory subsystem, False if not.
-        can_map_host_memory (bool): True if the device can map host memory into the CUDA address space, False if not.
-        compute_mode (int): Compute mode that device is currently in.
-        concurrent_kernels (bool): True if the device supports executing multiple kernels within the same context simultaneously, False if not.
-        ecc_enabled (bool): True if error correction is enabled on the device, False if error correction is disabled or not supported by the device.
-        pci_bus_id (int): PCI bus identifier of the device.
-        pci_device_id (int): PCI device (also known as slot) identifier of the device.
-        pci_domain_id (int): PCI domain identifier of the device.
-        tcc_driver (bool): True if the device is using a TCC driver, False if not.
-        memory_clock_rate (int): Peak memory clock frequency in kilohertz.
-        global_memory_bus_width (int): Global memory bus width in bits.
-        l2_cache_size (int): Size of L2 cache in bytes, 0 if the device doesn't have L2 cache.
-        max_threads_per_multiprocessor (int): Maximum resident threads per multiprocessor.
-        unified_addressing (bool): True if the device shares a unified address space with the host, False if not.
-        compute_capability_major (int): Major compute capability version number.
-        compute_capability_minor (int): Minor compute capability version number.
-        global_l1_cache_supported (bool): True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported by the device.
-        local_l1_cache_supported (bool): True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported by the device.
-        max_shared_memory_per_multiprocessor (int): Maximum amount of shared memory available to a multiprocessor in bytes.
-        max_registers_per_multiprocessor (int): Maximum number of 32-bit registers available to a multiprocessor.
-        managed_memory (bool): True if device supports allocating managed memory on this system, False if allocating managed memory is not supported by the device on this system.
-        multi_gpu_board (bool): True if device is on a multi-GPU board, False if not.
-        multi_gpu_board_group_id (int): Unique identifier for a group of devices associated with the same board.
-        host_native_atomic_supported (bool): True if Link between the device and the host supports native atomic operations, False if not.
-        single_to_double_precision_perf_ratio (int): Ratio of single precision performance (in floating-point operations per second) to double precision performance.
-        pageable_memory_access (bool): True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, False if not.
-        concurrent_managed_access (bool): True if device can coherently access managed memory concurrently with the CPU, False if not.
-        compute_preemption_supported (bool): True if device supports Compute Preemption, False if not.
-        can_use_host_pointer_for_registered_mem (bool): True if device can access host registered memory at the same virtual address as the CPU, False if not.
-        max_shared_memory_per_block_optin (int): The maximum per block shared memory size supported on this device.
-        pageable_memory_access_uses_host_page_tables (bool): True if device accesses pageable memory via the host's page tables, False if not.
-        direct_managed_mem_access_from_host (bool): True if the host can directly access managed memory on the device without migration, False if not.
-        virtual_memory_management_supported (bool): True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs, False if not.
-        handle_type_posix_file_descriptor_supported (bool): True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, False if not.
-        handle_type_win32_handle_supported (bool): True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, False if not.
-        handle_type_win32_kmt_handle_supported (bool): True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, False if not.
-        max_blocks_per_multiprocessor (int): Maximum number of thread blocks that can reside on a multiprocessor.
-        generic_compression_supported (bool): True if device supports compressible memory allocation via cuMemCreate, False if not.
-        max_persisting_l2_cache_size (int): Maximum L2 persisting lines capacity setting in bytes.
-        max_access_policy_window_size (int): Maximum value of CUaccessPolicyWindow::num_bytes.
-        gpu_direct_rdma_with_cuda_vmm_supported (bool): True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not.
-        reserved_shared_memory_per_block (int): Amount of shared memory per block reserved by CUDA driver in bytes.
-        sparse_cuda_array_supported (bool): True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not.
-        read_only_host_register_supported (bool): True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not.
-        memory_pools_supported (bool): True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not.
-        gpu_direct_rdma_supported (bool): True if device supports GPUDirect RDMA APIs, False if not.
-        gpu_direct_rdma_flush_writes_options (int): The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum.
-        gpu_direct_rdma_writes_ordering (int): GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute.
-        mempool_supported_handle_types (int): Bitmask of handle types supported with mempool based IPC.
-        deferred_mapping_cuda_array_supported (bool): True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not.
-        numa_config (int): NUMA configuration of a device.
-        numa_id (int): NUMA node ID of the GPU memory.
-        multicast_supported (bool): True if device supports switch multicast and reduction operations, False if not.
     """
 
     def __init__(self):
@@ -144,7 +32,10 @@ def _init(handle):
         return self
 
     @property
-    def max_threads_per_block(self):
+    def max_threads_per_block(self) -> int:
+        """
+        int: Maximum number of threads per block.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, self._handle
@@ -152,43 +43,64 @@ def max_threads_per_block(self):
         )
 
     @property
-    def max_block_dim_x(self):
+    def max_block_dim_x(self) -> int:
+        """
+        int: Maximum x-dimension of a block.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, self._handle)
         )
 
     @property
-    def max_block_dim_y(self):
+    def max_block_dim_y(self) -> int:
+        """
+        int: Maximum y-dimension of a block.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, self._handle)
         )
 
     @property
-    def max_block_dim_z(self):
+    def max_block_dim_z(self) -> int:
+        """
+        int: Maximum z-dimension of a block.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, self._handle)
         )
 
     @property
-    def max_grid_dim_x(self):
+    def max_grid_dim_x(self) -> int:
+        """
+        int: Maximum x-dimension of a grid.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, self._handle)
         )
 
     @property
-    def max_grid_dim_y(self):
+    def max_grid_dim_y(self) -> int:
+        """
+        int: Maximum y-dimension of a grid.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, self._handle)
         )
 
     @property
-    def max_grid_dim_z(self):
+    def max_grid_dim_z(self) -> int:
+        """
+        int: Maximum z-dimension of a grid.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, self._handle)
         )
 
     @property
-    def max_shared_memory_per_block(self):
+    def max_shared_memory_per_block(self) -> int:
+        """
+        int: Maximum amount of shared memory available to a thread block in bytes.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, self._handle
@@ -196,7 +108,10 @@ def max_shared_memory_per_block(self):
         )
 
     @property
-    def total_constant_memory(self):
+    def total_constant_memory(self) -> int:
+        """
+        int: Memory available on device for __constant__ variables in a CUDA C kernel in bytes.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, self._handle
@@ -204,19 +119,29 @@ def total_constant_memory(self):
         )
 
     @property
-    def warp_size(self):
+    def warp_size(self) -> int:
+        """
+        int: Warp size in threads.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, self._handle)
         )
 
     @property
-    def max_pitch(self):
+    def max_pitch(self) -> int:
+        """
+        int: Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated
+        through cuMemAllocPitch().
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, self._handle)
         )
 
     @property
-    def maximum_texture1d_width(self):
+    def maximum_texture1d_width(self) -> int:
+        """
+        int: Maximum 1D texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, self._handle
@@ -224,7 +149,10 @@ def maximum_texture1d_width(self):
         )
 
     @property
-    def maximum_texture1d_linear_width(self):
+    def maximum_texture1d_linear_width(self) -> int:
+        """
+        int: Maximum width for a 1D texture bound to linear memory.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, self._handle
@@ -232,7 +160,10 @@ def maximum_texture1d_linear_width(self):
         )
 
     @property
-    def maximum_texture1d_mipmapped_width(self):
+    def maximum_texture1d_mipmapped_width(self) -> int:
+        """
+        int: Maximum mipmapped 1D texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, self._handle
@@ -240,7 +171,10 @@ def maximum_texture1d_mipmapped_width(self):
         )
 
     @property
-    def maximum_texture2d_width(self):
+    def maximum_texture2d_width(self) -> int:
+        """
+        int: Maximum 2D texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, self._handle
@@ -248,7 +182,10 @@ def maximum_texture2d_width(self):
         )
 
     @property
-    def maximum_texture2d_height(self):
+    def maximum_texture2d_height(self) -> int:
+        """
+        int: Maximum 2D texture height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, self._handle
@@ -256,7 +193,10 @@ def maximum_texture2d_height(self):
         )
 
     @property
-    def maximum_texture2d_linear_width(self):
+    def maximum_texture2d_linear_width(self) -> int:
+        """
+        int: Maximum width for a 2D texture bound to linear memory.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, self._handle
@@ -264,7 +204,10 @@ def maximum_texture2d_linear_width(self):
         )
 
     @property
-    def maximum_texture2d_linear_height(self):
+    def maximum_texture2d_linear_height(self) -> int:
+        """
+        int: Maximum height for a 2D texture bound to linear memory.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, self._handle
@@ -272,7 +215,10 @@ def maximum_texture2d_linear_height(self):
         )
 
     @property
-    def maximum_texture2d_linear_pitch(self):
+    def maximum_texture2d_linear_pitch(self) -> int:
+        """
+        int: Maximum pitch in bytes for a 2D texture bound to linear memory.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, self._handle
@@ -280,7 +226,10 @@ def maximum_texture2d_linear_pitch(self):
         )
 
     @property
-    def maximum_texture2d_mipmapped_width(self):
+    def maximum_texture2d_mipmapped_width(self) -> int:
+        """
+        int: Maximum mipmapped 2D texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, self._handle
@@ -288,7 +237,10 @@ def maximum_texture2d_mipmapped_width(self):
         )
 
     @property
-    def maximum_texture2d_mipmapped_height(self):
+    def maximum_texture2d_mipmapped_height(self) -> int:
+        """
+        int: Maximum mipmapped 2D texture height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, self._handle
@@ -296,7 +248,10 @@ def maximum_texture2d_mipmapped_height(self):
         )
 
     @property
-    def maximum_texture3d_width(self):
+    def maximum_texture3d_width(self) -> int:
+        """
+        int: Maximum 3D texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, self._handle
@@ -304,7 +259,10 @@ def maximum_texture3d_width(self):
         )
 
     @property
-    def maximum_texture3d_height(self):
+    def maximum_texture3d_height(self) -> int:
+        """
+        int: Maximum 3D texture height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, self._handle
@@ -312,7 +270,10 @@ def maximum_texture3d_height(self):
         )
 
     @property
-    def maximum_texture3d_depth(self):
+    def maximum_texture3d_depth(self) -> int:
+        """
+        int: Maximum 3D texture depth.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, self._handle
@@ -320,7 +281,10 @@ def maximum_texture3d_depth(self):
         )
 
     @property
-    def maximum_texture3d_width_alternate(self):
+    def maximum_texture3d_width_alternate(self) -> int:
+        """
+        int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, self._handle
@@ -328,7 +292,10 @@ def maximum_texture3d_width_alternate(self):
         )
 
     @property
-    def maximum_texture3d_height_alternate(self):
+    def maximum_texture3d_height_alternate(self) -> int:
+        """
+        int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, self._handle
@@ -336,7 +303,10 @@ def maximum_texture3d_height_alternate(self):
         )
 
     @property
-    def maximum_texture3d_depth_alternate(self):
+    def maximum_texture3d_depth_alternate(self) -> int:
+        """
+        int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, self._handle
@@ -344,7 +314,10 @@ def maximum_texture3d_depth_alternate(self):
         )
 
     @property
-    def maximum_texturecubemap_width(self):
+    def maximum_texturecubemap_width(self) -> int:
+        """
+        int: Maximum cubemap texture width or height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, self._handle
@@ -352,7 +325,10 @@ def maximum_texturecubemap_width(self):
         )
 
     @property
-    def maximum_texture1d_layered_width(self):
+    def maximum_texture1d_layered_width(self) -> int:
+        """
+        int: Maximum 1D layered texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, self._handle
@@ -360,7 +336,10 @@ def maximum_texture1d_layered_width(self):
         )
 
     @property
-    def maximum_texture1d_layered_layers(self):
+    def maximum_texture1d_layered_layers(self) -> int:
+        """
+        int: Maximum layers in a 1D layered texture.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, self._handle
@@ -368,7 +347,10 @@ def maximum_texture1d_layered_layers(self):
         )
 
     @property
-    def maximum_texture2d_layered_width(self):
+    def maximum_texture2d_layered_width(self) -> int:
+        """
+        int: Maximum 2D layered texture width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, self._handle
@@ -376,7 +358,10 @@ def maximum_texture2d_layered_width(self):
         )
 
     @property
-    def maximum_texture2d_layered_height(self):
+    def maximum_texture2d_layered_height(self) -> int:
+        """
+        int: Maximum 2D layered texture height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, self._handle
@@ -384,7 +369,10 @@ def maximum_texture2d_layered_height(self):
         )
 
     @property
-    def maximum_texture2d_layered_layers(self):
+    def maximum_texture2d_layered_layers(self) -> int:
+        """
+        int: Maximum layers in a 2D layered texture.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, self._handle
@@ -392,7 +380,10 @@ def maximum_texture2d_layered_layers(self):
         )
 
     @property
-    def maximum_texturecubemap_layered_width(self):
+    def maximum_texturecubemap_layered_width(self) -> int:
+        """
+        int: Maximum cubemap layered texture width or height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, self._handle
@@ -400,7 +391,10 @@ def maximum_texturecubemap_layered_width(self):
         )
 
     @property
-    def maximum_texturecubemap_layered_layers(self):
+    def maximum_texturecubemap_layered_layers(self) -> int:
+        """
+        int: Maximum layers in a cubemap layered texture.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, self._handle
@@ -408,7 +402,10 @@ def maximum_texturecubemap_layered_layers(self):
         )
 
     @property
-    def maximum_surface1d_width(self):
+    def maximum_surface1d_width(self) -> int:
+        """
+        int: Maximum 1D surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, self._handle
@@ -416,7 +413,10 @@ def maximum_surface1d_width(self):
         )
 
     @property
-    def maximum_surface2d_width(self):
+    def maximum_surface2d_width(self) -> int:
+        """
+        int: Maximum 2D surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, self._handle
@@ -424,7 +424,10 @@ def maximum_surface2d_width(self):
         )
 
     @property
-    def maximum_surface2d_height(self):
+    def maximum_surface2d_height(self) -> int:
+        """
+        int: Maximum 2D surface height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, self._handle
@@ -432,7 +435,10 @@ def maximum_surface2d_height(self):
         )
 
     @property
-    def maximum_surface3d_width(self):
+    def maximum_surface3d_width(self) -> int:
+        """
+        int: Maximum 3D surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, self._handle
@@ -440,7 +446,10 @@ def maximum_surface3d_width(self):
         )
 
     @property
-    def maximum_surface3d_height(self):
+    def maximum_surface3d_height(self) -> int:
+        """
+        int: Maximum 3D surface height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, self._handle
@@ -448,7 +457,10 @@ def maximum_surface3d_height(self):
         )
 
     @property
-    def maximum_surface3d_depth(self):
+    def maximum_surface3d_depth(self) -> int:
+        """
+        int: Maximum 3D surface depth.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, self._handle
@@ -456,7 +468,10 @@ def maximum_surface3d_depth(self):
         )
 
     @property
-    def maximum_surface1d_layered_width(self):
+    def maximum_surface1d_layered_width(self) -> int:
+        """
+        int: Maximum 1D layered surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, self._handle
@@ -464,7 +479,10 @@ def maximum_surface1d_layered_width(self):
         )
 
     @property
-    def maximum_surface1d_layered_layers(self):
+    def maximum_surface1d_layered_layers(self) -> int:
+        """
+        int: Maximum layers in a 1D layered surface.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, self._handle
@@ -472,7 +490,10 @@ def maximum_surface1d_layered_layers(self):
         )
 
     @property
-    def maximum_surface2d_layered_width(self):
+    def maximum_surface2d_layered_width(self) -> int:
+        """
+        int: Maximum 2D layered surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, self._handle
@@ -480,7 +501,10 @@ def maximum_surface2d_layered_width(self):
         )
 
     @property
-    def maximum_surface2d_layered_height(self):
+    def maximum_surface2d_layered_height(self) -> int:
+        """
+        int: Maximum 2D layered surface height.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, self._handle
@@ -488,7 +512,10 @@ def maximum_surface2d_layered_height(self):
         )
 
     @property
-    def maximum_surface2d_layered_layers(self):
+    def maximum_surface2d_layered_layers(self) -> int:
+        """
+        int: Maximum layers in a 2D layered surface.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, self._handle
@@ -496,7 +523,10 @@ def maximum_surface2d_layered_layers(self):
         )
 
     @property
-    def maximum_surfacecubemap_width(self):
+    def maximum_surfacecubemap_width(self) -> int:
+        """
+        int: Maximum cubemap surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, self._handle
@@ -504,7 +534,10 @@ def maximum_surfacecubemap_width(self):
         )
 
     @property
-    def maximum_surfacecubemap_layered_width(self):
+    def maximum_surfacecubemap_layered_width(self) -> int:
+        """
+        int: Maximum cubemap layered surface width.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, self._handle
@@ -512,7 +545,10 @@ def maximum_surfacecubemap_layered_width(self):
         )
 
     @property
-    def maximum_surfacecubemap_layered_layers(self):
+    def maximum_surfacecubemap_layered_layers(self) -> int:
+        """
+        int: Maximum layers in a cubemap layered surface.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, self._handle
@@ -520,7 +556,10 @@ def maximum_surfacecubemap_layered_layers(self):
         )
 
     @property
-    def max_registers_per_block(self):
+    def max_registers_per_block(self) -> int:
+        """
+        int: Maximum number of 32-bit registers available to a thread block.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, self._handle
@@ -528,19 +567,29 @@ def max_registers_per_block(self):
         )
 
     @property
-    def clock_rate(self):
+    def clock_rate(self) -> int:
+        """
+        int: The typical clock frequency in kilohertz.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, self._handle)
         )
 
     @property
-    def texture_alignment(self):
+    def texture_alignment(self) -> int:
+        """
+        int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied
+        to texture fetches.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, self._handle)
         )
 
     @property
-    def texture_pitch_alignment(self):
+    def texture_pitch_alignment(self) -> int:
+        """
+        int: Pitch alignment requirement for 2D texture references bound to pitched memory.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, self._handle
@@ -548,7 +597,11 @@ def texture_pitch_alignment(self):
         )
 
     @property
-    def gpu_overlap(self):
+    def gpu_overlap(self) -> bool:
+        """
+        bool: True if the device can concurrently copy memory between host and device while executing a kernel, False
+        if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, self._handle)
@@ -556,7 +609,10 @@ def gpu_overlap(self):
         )
 
     @property
-    def multiprocessor_count(self):
+    def multiprocessor_count(self) -> int:
+        """
+        int: Number of multiprocessors on the device.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, self._handle
@@ -564,7 +620,10 @@ def multiprocessor_count(self):
         )
 
     @property
-    def kernel_exec_timeout(self):
+    def kernel_exec_timeout(self) -> bool:
+        """
+        bool: True if there is a run time limit for kernels executed on the device, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -574,7 +633,10 @@ def kernel_exec_timeout(self):
         )
 
     @property
-    def integrated(self):
+    def integrated(self) -> bool:
+        """
+        bool: True if the device is integrated with the memory subsystem, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, self._handle)
@@ -582,7 +644,10 @@ def integrated(self):
         )
 
     @property
-    def can_map_host_memory(self):
+    def can_map_host_memory(self) -> bool:
+        """
+        True if the device can map host memory into the CUDA address space, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -592,13 +657,19 @@ def can_map_host_memory(self):
         )
 
     @property
-    def compute_mode(self):
+    def compute_mode(self) -> int:
+        """
+        Compute mode that device is currently in.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, self._handle)
         )
 
     @property
-    def concurrent_kernels(self):
+    def concurrent_kernels(self) -> bool:
+        """
+        True if the device supports executing multiple kernels within the same context simultaneously, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -608,7 +679,11 @@ def concurrent_kernels(self):
         )
 
     @property
-    def ecc_enabled(self):
+    def ecc_enabled(self) -> bool:
+        """
+        True if error correction is enabled on the device, False if error correction is disabled or not supported by
+        the device.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, self._handle)
@@ -616,25 +691,37 @@ def ecc_enabled(self):
         )
 
     @property
-    def pci_bus_id(self):
+    def pci_bus_id(self) -> int:
+        """
+        PCI bus identifier of the device.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, self._handle)
         )
 
     @property
-    def pci_device_id(self):
+    def pci_device_id(self) -> int:
+        """
+        PCI device (also known as slot) identifier of the device.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, self._handle)
         )
 
     @property
-    def pci_domain_id(self):
+    def pci_domain_id(self) -> int:
+        """
+        PCI domain identifier of the device.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, self._handle)
         )
 
     @property
-    def tcc_driver(self):
+    def tcc_driver(self) -> bool:
+        """
+        True if the device is using a TCC driver, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, self._handle)
@@ -642,13 +729,19 @@ def tcc_driver(self):
         )
 
     @property
-    def memory_clock_rate(self):
+    def memory_clock_rate(self) -> int:
+        """
+        Peak memory clock frequency in kilohertz.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, self._handle)
         )
 
     @property
-    def global_memory_bus_width(self):
+    def global_memory_bus_width(self) -> int:
+        """
+        Global memory bus width in bits.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, self._handle
@@ -656,13 +749,19 @@ def global_memory_bus_width(self):
         )
 
     @property
-    def l2_cache_size(self):
+    def l2_cache_size(self) -> int:
+        """
+        Size of L2 cache in bytes, 0 if the device doesn't have L2 cache.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, self._handle)
         )
 
     @property
-    def max_threads_per_multiprocessor(self):
+    def max_threads_per_multiprocessor(self) -> int:
+        """
+        Maximum resident threads per multiprocessor.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, self._handle
@@ -670,7 +769,10 @@ def max_threads_per_multiprocessor(self):
         )
 
     @property
-    def unified_addressing(self):
+    def unified_addressing(self) -> bool:
+        """
+        True if the device shares a unified address space with the host, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -680,7 +782,10 @@ def unified_addressing(self):
         )
 
     @property
-    def compute_capability_major(self):
+    def compute_capability_major(self) -> int:
+        """
+        Major compute capability version number.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, self._handle
@@ -688,7 +793,10 @@ def compute_capability_major(self):
         )
 
     @property
-    def compute_capability_minor(self):
+    def compute_capability_minor(self) -> int:
+        """
+        Minor compute capability version number.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, self._handle
@@ -696,7 +804,11 @@ def compute_capability_minor(self):
         )
 
     @property
-    def global_l1_cache_supported(self):
+    def global_l1_cache_supported(self) -> bool:
+        """
+        True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported
+        by the device.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -706,7 +818,11 @@ def global_l1_cache_supported(self):
         )
 
     @property
-    def local_l1_cache_supported(self):
+    def local_l1_cache_supported(self) -> bool:
+        """
+        True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported
+        by the device.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -716,7 +832,10 @@ def local_l1_cache_supported(self):
         )
 
     @property
-    def max_shared_memory_per_multiprocessor(self):
+    def max_shared_memory_per_multiprocessor(self) -> int:
+        """
+        Maximum amount of shared memory available to a multiprocessor in bytes.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, self._handle
@@ -724,7 +843,10 @@ def max_shared_memory_per_multiprocessor(self):
         )
 
     @property
-    def max_registers_per_multiprocessor(self):
+    def max_registers_per_multiprocessor(self) -> int:
+        """
+        Maximum number of 32-bit registers available to a multiprocessor.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, self._handle
@@ -732,7 +854,11 @@ def max_registers_per_multiprocessor(self):
         )
 
     @property
-    def managed_memory(self):
+    def managed_memory(self) -> bool:
+        """
+        True if device supports allocating managed memory on this system, False if allocating managed memory is not
+        supported by the device on this system.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, self._handle)
@@ -740,7 +866,10 @@ def managed_memory(self):
         )
 
     @property
-    def multi_gpu_board(self):
+    def multi_gpu_board(self) -> bool:
+        """
+        True if device is on a multi-GPU board, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, self._handle)
@@ -748,7 +877,10 @@ def multi_gpu_board(self):
         )
 
     @property
-    def multi_gpu_board_group_id(self):
+    def multi_gpu_board_group_id(self) -> int:
+        """
+        Unique identifier for a group of devices associated with the same board.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, self._handle
@@ -756,7 +888,10 @@ def multi_gpu_board_group_id(self):
         )
 
     @property
-    def host_native_atomic_supported(self):
+    def host_native_atomic_supported(self) -> bool:
+        """
+        True if Link between the device and the host supports native atomic operations, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -766,7 +901,11 @@ def host_native_atomic_supported(self):
         )
 
     @property
-    def single_to_double_precision_perf_ratio(self):
+    def single_to_double_precision_perf_ratio(self) -> int:
+        """
+        Ratio of single precision performance (in floating-point operations per second) to double precision
+        performance.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, self._handle
@@ -774,7 +913,11 @@ def single_to_double_precision_perf_ratio(self):
         )
 
     @property
-    def pageable_memory_access(self):
+    def pageable_memory_access(self) -> bool:
+        """
+        True if device supports coherently accessing pageable memory without calling cudaHostRegister on it,
+        False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -784,7 +927,10 @@ def pageable_memory_access(self):
         )
 
     @property
-    def concurrent_managed_access(self):
+    def concurrent_managed_access(self) -> bool:
+        """
+        True if device can coherently access managed memory concurrently with the CPU, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -794,7 +940,10 @@ def concurrent_managed_access(self):
         )
 
     @property
-    def compute_preemption_supported(self):
+    def compute_preemption_supported(self) -> bool:
+        """
+        True if device supports Compute Preemption, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -804,7 +953,10 @@ def compute_preemption_supported(self):
         )
 
     @property
-    def can_use_host_pointer_for_registered_mem(self):
+    def can_use_host_pointer_for_registered_mem(self) -> bool:
+        """
+        True if device can access host registered memory at the same virtual address as the CPU, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -814,7 +966,10 @@ def can_use_host_pointer_for_registered_mem(self):
         )
 
     @property
-    def max_shared_memory_per_block_optin(self):
+    def max_shared_memory_per_block_optin(self) -> int:
+        """
+        The maximum per block shared memory size supported on this device.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, self._handle
@@ -822,7 +977,10 @@ def max_shared_memory_per_block_optin(self):
         )
 
     @property
-    def pageable_memory_access_uses_host_page_tables(self):
+    def pageable_memory_access_uses_host_page_tables(self) -> bool:
+        """
+        True if device accesses pageable memory via the host's page tables, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -833,7 +991,10 @@ def pageable_memory_access_uses_host_page_tables(self):
         )
 
     @property
-    def direct_managed_mem_access_from_host(self):
+    def direct_managed_mem_access_from_host(self) -> bool:
+        """
+        True if the host can directly access managed memory on the device without migration, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -843,7 +1004,11 @@ def direct_managed_mem_access_from_host(self):
         )
 
     @property
-    def virtual_memory_management_supported(self):
+    def virtual_memory_management_supported(self) -> bool:
+        """
+        True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap
+        and related APIs, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -853,7 +1018,11 @@ def virtual_memory_management_supported(self):
         )
 
     @property
-    def handle_type_posix_file_descriptor_supported(self):
+    def handle_type_posix_file_descriptor_supported(self) -> bool:
+        """
+        True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle,
+        False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -864,7 +1033,11 @@ def handle_type_posix_file_descriptor_supported(self):
         )
 
     @property
-    def handle_type_win32_handle_supported(self):
+    def handle_type_win32_handle_supported(self) -> bool:
+        """
+        True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle,
+        False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -874,7 +1047,11 @@ def handle_type_win32_handle_supported(self):
         )
 
     @property
-    def handle_type_win32_kmt_handle_supported(self):
+    def handle_type_win32_kmt_handle_supported(self) -> bool:
+        """
+        True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle,
+        False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -884,7 +1061,10 @@ def handle_type_win32_kmt_handle_supported(self):
         )
 
     @property
-    def max_blocks_per_multiprocessor(self):
+    def max_blocks_per_multiprocessor(self) -> int:
+        """
+        Maximum number of thread blocks that can reside on a multiprocessor.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, self._handle
@@ -892,7 +1072,10 @@ def max_blocks_per_multiprocessor(self):
         )
 
     @property
-    def generic_compression_supported(self):
+    def generic_compression_supported(self) -> bool:
+        """
+        True if device supports compressible memory allocation via cuMemCreate, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -902,7 +1085,10 @@ def generic_compression_supported(self):
         )
 
     @property
-    def max_persisting_l2_cache_size(self):
+    def max_persisting_l2_cache_size(self) -> int:
+        """
+        Maximum L2 persisting lines capacity setting in bytes.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, self._handle
@@ -910,7 +1096,10 @@ def max_persisting_l2_cache_size(self):
         )
 
     @property
-    def max_access_policy_window_size(self):
+    def max_access_policy_window_size(self) -> int:
+        """
+        Maximum value of CUaccessPolicyWindow::num_bytes.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, self._handle
@@ -918,7 +1107,10 @@ def max_access_policy_window_size(self):
         )
 
     @property
-    def gpu_direct_rdma_with_cuda_vmm_supported(self):
+    def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool:
+        """
+        True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -928,7 +1120,10 @@ def gpu_direct_rdma_with_cuda_vmm_supported(self):
         )
 
     @property
-    def reserved_shared_memory_per_block(self):
+    def reserved_shared_memory_per_block(self) -> int:
+        """
+        Amount of shared memory per block reserved by CUDA driver in bytes.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, self._handle
@@ -936,7 +1131,10 @@ def reserved_shared_memory_per_block(self):
         )
 
     @property
-    def sparse_cuda_array_supported(self):
+    def sparse_cuda_array_supported(self) -> bool:
+        """
+        True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -946,7 +1144,11 @@ def sparse_cuda_array_supported(self):
         )
 
     @property
-    def read_only_host_register_supported(self):
+    def read_only_host_register_supported(self) -> bool:
+        """
+        True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register
+        memory that must be mapped as read-only to the GPU, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -956,7 +1158,10 @@ def read_only_host_register_supported(self):
         )
 
     @property
-    def memory_pools_supported(self):
+    def memory_pools_supported(self) -> bool:
+        """
+        True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -966,7 +1171,10 @@ def memory_pools_supported(self):
         )
 
     @property
-    def gpu_direct_rdma_supported(self):
+    def gpu_direct_rdma_supported(self) -> bool:
+        """
+        True if device supports GPUDirect RDMA APIs, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -976,7 +1184,11 @@ def gpu_direct_rdma_supported(self):
         )
 
     @property
-    def gpu_direct_rdma_flush_writes_options(self):
+    def gpu_direct_rdma_flush_writes_options(self) -> int:
+        """
+        The returned attribute shall be interpreted as a bitmask, where the individual bits are described by
+        the CUflushGPUDirectRDMAWritesOptions enum.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, self._handle
@@ -984,7 +1196,11 @@ def gpu_direct_rdma_flush_writes_options(self):
         )
 
     @property
-    def gpu_direct_rdma_writes_ordering(self):
+    def gpu_direct_rdma_writes_ordering(self) -> int:
+        """
+        GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated
+        by the returned attribute.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, self._handle
@@ -992,7 +1208,10 @@ def gpu_direct_rdma_writes_ordering(self):
         )
 
     @property
-    def mempool_supported_handle_types(self):
+    def mempool_supported_handle_types(self) -> int:
+        """
+        Bitmask of handle types supported with mempool based IPC.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(
                 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, self._handle
@@ -1000,7 +1219,10 @@ def mempool_supported_handle_types(self):
         )
 
     @property
-    def deferred_mapping_cuda_array_supported(self):
+    def deferred_mapping_cuda_array_supported(self) -> bool:
+        """
+        True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -1010,19 +1232,28 @@ def deferred_mapping_cuda_array_supported(self):
         )
 
     @property
-    def numa_config(self):
+    def numa_config(self) -> int:
+        """
+        NUMA configuration of a device.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, self._handle)
         )
 
     @property
-    def numa_id(self):
+    def numa_id(self) -> int:
+        """
+        NUMA node ID of the GPU memory.
+        """
         return handle_return(
             driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, self._handle)
         )
 
     @property
-    def multicast_supported(self):
+    def multicast_supported(self) -> bool:
+        """
+        True if device supports switch multicast and reduction operations, False if not.
+        """
         return bool(
             handle_return(
                 driver.cuDeviceGetAttribute(
@@ -1032,9 +1263,6 @@ def multicast_supported(self):
         )
 
 
-# ruff: enable
-
-
 class Device:
     """Represent a GPU and act as an entry point for cuda.core features.
 
@@ -1147,7 +1375,7 @@ def name(self) -> str:
 
     @property
     def properties(self) -> DeviceProperties:
-        """Return information about the compute-device."""
+        """Return a :obj:`~_device.DeviceProperties` class with information about the device."""
         if self._properties is None:
             self._properties = DeviceProperties._init(self._id)
 

From 9f9512592f2228b048bf5ff49f7ae6a1e2b4b71f Mon Sep 17 00:00:00 2001
From: ksimpson <ksimpson@nvidia.com>
Date: Tue, 28 Jan 2025 09:37:12 -0800
Subject: [PATCH 10/10] address comments

---
 cuda_core/cuda/core/experimental/_device.py | 696 ++++----------------
 cuda_core/tests/test_device.py              |   8 +
 2 files changed, 144 insertions(+), 560 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index e2894ecc..cfbda8ef 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -24,108 +24,85 @@ class DeviceProperties:
     def __init__(self):
         raise RuntimeError("DeviceProperties should not be instantiated directly")
 
-    slots = "_handle"
+    __slots__ = "_handle"
 
     def _init(handle):
         self = DeviceProperties.__new__(DeviceProperties)
         self._handle = handle
         return self
 
+    def _get_attribute(self, dev_attr):
+        return handle_return(driver.cuDeviceGetAttribute(dev_attr, self._handle))
+
     @property
     def max_threads_per_block(self) -> int:
         """
         int: Maximum number of threads per block.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)
 
     @property
     def max_block_dim_x(self) -> int:
         """
         int: Maximum x-dimension of a block.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)
 
     @property
     def max_block_dim_y(self) -> int:
         """
         int: Maximum y-dimension of a block.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)
 
     @property
     def max_block_dim_z(self) -> int:
         """
         int: Maximum z-dimension of a block.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z)
 
     @property
     def max_grid_dim_x(self) -> int:
         """
         int: Maximum x-dimension of a grid.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X)
 
     @property
     def max_grid_dim_y(self) -> int:
         """
         int: Maximum y-dimension of a grid.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y)
 
     @property
     def max_grid_dim_z(self) -> int:
         """
         int: Maximum z-dimension of a grid.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z)
 
     @property
     def max_shared_memory_per_block(self) -> int:
         """
         int: Maximum amount of shared memory available to a thread block in bytes.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK)
 
     @property
     def total_constant_memory(self) -> int:
         """
         int: Memory available on device for __constant__ variables in a CUDA C kernel in bytes.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY)
 
     @property
     def warp_size(self) -> int:
         """
         int: Warp size in threads.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE)
 
     @property
     def max_pitch(self) -> int:
@@ -133,675 +110,438 @@ def max_pitch(self) -> int:
         int: Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated
         through cuMemAllocPitch().
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH)
 
     @property
     def maximum_texture1d_width(self) -> int:
         """
         int: Maximum 1D texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH)
 
     @property
     def maximum_texture1d_linear_width(self) -> int:
         """
         int: Maximum width for a 1D texture bound to linear memory.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH)
 
     @property
     def maximum_texture1d_mipmapped_width(self) -> int:
         """
         int: Maximum mipmapped 1D texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH)
 
     @property
     def maximum_texture2d_width(self) -> int:
         """
         int: Maximum 2D texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH)
 
     @property
     def maximum_texture2d_height(self) -> int:
         """
         int: Maximum 2D texture height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT)
 
     @property
     def maximum_texture2d_linear_width(self) -> int:
         """
         int: Maximum width for a 2D texture bound to linear memory.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH)
 
     @property
     def maximum_texture2d_linear_height(self) -> int:
         """
         int: Maximum height for a 2D texture bound to linear memory.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT)
 
     @property
     def maximum_texture2d_linear_pitch(self) -> int:
         """
         int: Maximum pitch in bytes for a 2D texture bound to linear memory.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH)
 
     @property
     def maximum_texture2d_mipmapped_width(self) -> int:
         """
         int: Maximum mipmapped 2D texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH)
 
     @property
     def maximum_texture2d_mipmapped_height(self) -> int:
         """
         int: Maximum mipmapped 2D texture height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT)
 
     @property
     def maximum_texture3d_width(self) -> int:
         """
         int: Maximum 3D texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH)
 
     @property
     def maximum_texture3d_height(self) -> int:
         """
         int: Maximum 3D texture height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT)
 
     @property
     def maximum_texture3d_depth(self) -> int:
         """
         int: Maximum 3D texture depth.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH)
 
     @property
     def maximum_texture3d_width_alternate(self) -> int:
         """
         int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE)
 
     @property
     def maximum_texture3d_height_alternate(self) -> int:
         """
         int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE)
 
     @property
     def maximum_texture3d_depth_alternate(self) -> int:
         """
         int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE)
 
     @property
     def maximum_texturecubemap_width(self) -> int:
         """
         int: Maximum cubemap texture width or height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH)
 
     @property
     def maximum_texture1d_layered_width(self) -> int:
         """
         int: Maximum 1D layered texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH)
 
     @property
     def maximum_texture1d_layered_layers(self) -> int:
         """
         int: Maximum layers in a 1D layered texture.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS)
 
     @property
     def maximum_texture2d_layered_width(self) -> int:
         """
         int: Maximum 2D layered texture width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH)
 
     @property
     def maximum_texture2d_layered_height(self) -> int:
         """
         int: Maximum 2D layered texture height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT)
 
     @property
     def maximum_texture2d_layered_layers(self) -> int:
         """
         int: Maximum layers in a 2D layered texture.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS)
 
     @property
     def maximum_texturecubemap_layered_width(self) -> int:
         """
         int: Maximum cubemap layered texture width or height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH)
 
     @property
     def maximum_texturecubemap_layered_layers(self) -> int:
         """
         int: Maximum layers in a cubemap layered texture.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS)
 
     @property
     def maximum_surface1d_width(self) -> int:
         """
         int: Maximum 1D surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH)
 
     @property
     def maximum_surface2d_width(self) -> int:
         """
         int: Maximum 2D surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH)
 
     @property
     def maximum_surface2d_height(self) -> int:
         """
         int: Maximum 2D surface height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT)
 
     @property
     def maximum_surface3d_width(self) -> int:
         """
         int: Maximum 3D surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH)
 
     @property
     def maximum_surface3d_height(self) -> int:
         """
         int: Maximum 3D surface height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT)
 
     @property
     def maximum_surface3d_depth(self) -> int:
         """
         int: Maximum 3D surface depth.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH)
 
     @property
     def maximum_surface1d_layered_width(self) -> int:
         """
         int: Maximum 1D layered surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH)
 
     @property
     def maximum_surface1d_layered_layers(self) -> int:
         """
         int: Maximum layers in a 1D layered surface.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS)
 
     @property
     def maximum_surface2d_layered_width(self) -> int:
         """
         int: Maximum 2D layered surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH)
 
     @property
     def maximum_surface2d_layered_height(self) -> int:
         """
         int: Maximum 2D layered surface height.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT)
 
     @property
     def maximum_surface2d_layered_layers(self) -> int:
         """
         int: Maximum layers in a 2D layered surface.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS)
 
     @property
     def maximum_surfacecubemap_width(self) -> int:
         """
         int: Maximum cubemap surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH)
 
     @property
     def maximum_surfacecubemap_layered_width(self) -> int:
         """
         int: Maximum cubemap layered surface width.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH)
 
     @property
     def maximum_surfacecubemap_layered_layers(self) -> int:
         """
         int: Maximum layers in a cubemap layered surface.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS)
 
     @property
     def max_registers_per_block(self) -> int:
         """
         int: Maximum number of 32-bit registers available to a thread block.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK)
 
     @property
     def clock_rate(self) -> int:
         """
         int: The typical clock frequency in kilohertz.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE)
 
     @property
     def texture_alignment(self) -> int:
         """
-        int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset applied
-        to texture fetches.
+        int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset
+        applied to texture fetches.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT)
 
     @property
     def texture_pitch_alignment(self) -> int:
         """
         int: Pitch alignment requirement for 2D texture references bound to pitched memory.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT)
 
     @property
     def gpu_overlap(self) -> bool:
         """
-        bool: True if the device can concurrently copy memory between host and device while executing a kernel, False
-        if not.
+        bool: True if the device can concurrently copy memory between host and device while executing a kernel,
+        False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, self._handle)
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP))
 
     @property
     def multiprocessor_count(self) -> int:
         """
         int: Number of multiprocessors on the device.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
 
     @property
     def kernel_exec_timeout(self) -> bool:
         """
         bool: True if there is a run time limit for kernels executed on the device, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT))
 
     @property
     def integrated(self) -> bool:
         """
         bool: True if the device is integrated with the memory subsystem, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, self._handle)
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED))
 
     @property
     def can_map_host_memory(self) -> bool:
         """
-        True if the device can map host memory into the CUDA address space, False if not.
+        bool: True if the device can map host memory into the CUDA address space, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY))
 
     @property
     def compute_mode(self) -> int:
         """
-        Compute mode that device is currently in.
+        int: Compute mode that device is currently in.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE)
 
     @property
     def concurrent_kernels(self) -> bool:
         """
-        True if the device supports executing multiple kernels within the same context simultaneously, False if not.
+        bool: True if the device supports executing multiple kernels within the same context simultaneously,
+        False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS))
 
     @property
     def ecc_enabled(self) -> bool:
         """
-        True if error correction is enabled on the device, False if error correction is disabled or not supported by
-        the device.
+        bool: True if error correction is enabled on the device, False if error correction is disabled or not
+        supported by the device.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, self._handle)
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED))
 
     @property
     def pci_bus_id(self) -> int:
         """
-        PCI bus identifier of the device.
+        int: PCI bus identifier of the device.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID)
 
     @property
     def pci_device_id(self) -> int:
         """
-        PCI device (also known as slot) identifier of the device.
+        int: PCI device (also known as slot) identifier of the device.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID)
 
     @property
     def pci_domain_id(self) -> int:
         """
-        PCI domain identifier of the device.
+        int: PCI domain identifier of the device.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID)
 
     @property
     def tcc_driver(self) -> bool:
         """
-        True if the device is using a TCC driver, False if not.
+        bool: True if the device is using a TCC driver, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, self._handle)
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER))
 
     @property
     def memory_clock_rate(self) -> int:
         """
-        Peak memory clock frequency in kilohertz.
+        int: Peak memory clock frequency in kilohertz.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE)
 
     @property
     def global_memory_bus_width(self) -> int:
         """
-        Global memory bus width in bits.
+        int: Global memory bus width in bits.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH)
 
     @property
     def l2_cache_size(self) -> int:
         """
-        Size of L2 cache in bytes, 0 if the device doesn't have L2 cache.
+        int: Size of L2 cache in bytes, 0 if the device doesn't have L2 cache.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE)
 
     @property
     def max_threads_per_multiprocessor(self) -> int:
         """
-        Maximum resident threads per multiprocessor.
+        int: Maximum resident threads per multiprocessor.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR)
 
     @property
     def unified_addressing(self) -> bool:
         """
-        True if the device shares a unified address space with the host, False if not.
+        bool: True if the device shares a unified address space with the host, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING))
 
     @property
     def compute_capability_major(self) -> int:
         """
-        Major compute capability version number.
+        int: Major compute capability version number.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR)
 
     @property
     def compute_capability_minor(self) -> int:
         """
-        Minor compute capability version number.
+        int: Minor compute capability version number.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)
 
     @property
     def global_l1_cache_supported(self) -> bool:
@@ -809,13 +549,7 @@ def global_l1_cache_supported(self) -> bool:
         True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported
         by the device.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED))
 
     @property
     def local_l1_cache_supported(self) -> bool:
@@ -823,35 +557,21 @@ def local_l1_cache_supported(self) -> bool:
         True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported
         by the device.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED))
 
     @property
     def max_shared_memory_per_multiprocessor(self) -> int:
         """
         Maximum amount of shared memory available to a multiprocessor in bytes.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR)
 
     @property
     def max_registers_per_multiprocessor(self) -> int:
         """
         Maximum number of 32-bit registers available to a multiprocessor.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR)
 
     @property
     def managed_memory(self) -> bool:
@@ -859,46 +579,28 @@ def managed_memory(self) -> bool:
         True if device supports allocating managed memory on this system, False if allocating managed memory is not
         supported by the device on this system.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, self._handle)
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY))
 
     @property
     def multi_gpu_board(self) -> bool:
         """
         True if device is on a multi-GPU board, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, self._handle)
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD))
 
     @property
     def multi_gpu_board_group_id(self) -> int:
         """
         Unique identifier for a group of devices associated with the same board.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID)
 
     @property
     def host_native_atomic_supported(self) -> bool:
         """
         True if Link between the device and the host supports native atomic operations, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED))
 
     @property
     def single_to_double_precision_perf_ratio(self) -> int:
@@ -906,11 +608,7 @@ def single_to_double_precision_perf_ratio(self) -> int:
         Ratio of single precision performance (in floating-point operations per second) to double precision
         performance.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO)
 
     @property
     def pageable_memory_access(self) -> bool:
@@ -918,39 +616,21 @@ def pageable_memory_access(self) -> bool:
         True if device supports coherently accessing pageable memory without calling cudaHostRegister on it,
         False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS))
 
     @property
     def concurrent_managed_access(self) -> bool:
         """
         True if device can coherently access managed memory concurrently with the CPU, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS))
 
     @property
     def compute_preemption_supported(self) -> bool:
         """
         True if device supports Compute Preemption, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED))
 
     @property
     def can_use_host_pointer_for_registered_mem(self) -> bool:
@@ -958,11 +638,7 @@ def can_use_host_pointer_for_registered_mem(self) -> bool:
         True if device can access host registered memory at the same virtual address as the CPU, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM)
         )
 
     @property
@@ -970,11 +646,7 @@ def max_shared_memory_per_block_optin(self) -> int:
         """
         The maximum per block shared memory size supported on this device.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN)
 
     @property
     def pageable_memory_access_uses_host_page_tables(self) -> bool:
@@ -982,11 +654,8 @@ def pageable_memory_access_uses_host_page_tables(self) -> bool:
         True if device accesses pageable memory via the host's page tables, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
-                    self._handle,
-                )
+            self._get_attribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES
             )
         )
 
@@ -996,11 +665,7 @@ def direct_managed_mem_access_from_host(self) -> bool:
         True if the host can directly access managed memory on the device without migration, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST)
         )
 
     @property
@@ -1010,11 +675,7 @@ def virtual_memory_management_supported(self) -> bool:
         and related APIs, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED)
         )
 
     @property
@@ -1024,11 +685,8 @@ def handle_type_posix_file_descriptor_supported(self) -> bool:
         False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
-                    self._handle,
-                )
+            self._get_attribute(
+                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED
             )
         )
 
@@ -1039,11 +697,7 @@ def handle_type_win32_handle_supported(self) -> bool:
         False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED)
         )
 
     @property
@@ -1053,11 +707,7 @@ def handle_type_win32_kmt_handle_supported(self) -> bool:
         False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED)
         )
 
     @property
@@ -1065,46 +715,28 @@ def max_blocks_per_multiprocessor(self) -> int:
         """
         Maximum number of thread blocks that can reside on a multiprocessor.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR)
 
     @property
     def generic_compression_supported(self) -> bool:
         """
         True if device supports compressible memory allocation via cuMemCreate, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED))
 
     @property
     def max_persisting_l2_cache_size(self) -> int:
         """
         Maximum L2 persisting lines capacity setting in bytes.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE)
 
     @property
     def max_access_policy_window_size(self) -> int:
         """
         Maximum value of CUaccessPolicyWindow::num_bytes.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE)
 
     @property
     def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool:
@@ -1112,11 +744,7 @@ def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool:
         True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED)
         )
 
     @property
@@ -1124,24 +752,14 @@ def reserved_shared_memory_per_block(self) -> int:
         """
         Amount of shared memory per block reserved by CUDA driver in bytes.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK)
 
     @property
     def sparse_cuda_array_supported(self) -> bool:
         """
         True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED))
 
     @property
     def read_only_host_register_supported(self) -> bool:
@@ -1150,11 +768,7 @@ def read_only_host_register_supported(self) -> bool:
         memory that must be mapped as read-only to the GPU, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED)
         )
 
     @property
@@ -1162,26 +776,14 @@ def memory_pools_supported(self) -> bool:
         """
         True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED))
 
     @property
     def gpu_direct_rdma_supported(self) -> bool:
         """
         True if device supports GPUDirect RDMA APIs, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED))
 
     @property
     def gpu_direct_rdma_flush_writes_options(self) -> int:
@@ -1189,11 +791,7 @@ def gpu_direct_rdma_flush_writes_options(self) -> int:
         The returned attribute shall be interpreted as a bitmask, where the individual bits are described by
         the CUflushGPUDirectRDMAWritesOptions enum.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS)
 
     @property
     def gpu_direct_rdma_writes_ordering(self) -> int:
@@ -1201,22 +799,14 @@ def gpu_direct_rdma_writes_ordering(self) -> int:
         GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated
         by the returned attribute.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING)
 
     @property
     def mempool_supported_handle_types(self) -> int:
         """
         Bitmask of handle types supported with mempool based IPC.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(
-                driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, self._handle
-            )
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)
 
     @property
     def deferred_mapping_cuda_array_supported(self) -> bool:
@@ -1224,11 +814,7 @@ def deferred_mapping_cuda_array_supported(self) -> bool:
         True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not.
         """
         return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED, self._handle
-                )
-            )
+            self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED)
         )
 
     @property
@@ -1236,31 +822,21 @@ def numa_config(self) -> int:
         """
         NUMA configuration of a device.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG)
 
     @property
     def numa_id(self) -> int:
         """
         NUMA node ID of the GPU memory.
         """
-        return handle_return(
-            driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, self._handle)
-        )
+        return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID)
 
     @property
     def multicast_supported(self) -> bool:
         """
         True if device supports switch multicast and reduction operations, False if not.
         """
-        return bool(
-            handle_return(
-                driver.cuDeviceGetAttribute(
-                    driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED, self._handle
-                )
-            )
-        )
+        return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED))
 
 
 class Device:
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 30287a38..691fa47c 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -198,3 +198,11 @@ def test_compute_capability():
 def test_device_property_types(property_name, expected_type):
     device = Device()
     assert isinstance(getattr(device.properties, property_name), expected_type)
+
+
+def test_device_properties_complete():
+    device = Device()
+    live_props = set(attr for attr in dir(device.properties) if not attr.startswith("_"))
+    tab_props = set(attr for attr, _ in cuda_base_properties)
+    assert len(tab_props) == len(cuda_base_properties)  # Ensure no duplicates.
+    assert tab_props == live_props  # Ensure exact match.