diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py index 747174b2..cfbda8ef 100644 --- a/cuda_core/cuda/core/experimental/_device.py +++ b/cuda_core/cuda/core/experimental/_device.py @@ -14,6 +14,831 @@ _tls_lock = threading.Lock() +class DeviceProperties: + """ + A class to query various attributes of a CUDA device. + + Attributes are read-only and provide information about the device. + """ + + def __init__(self): + raise RuntimeError("DeviceProperties should not be instantiated directly") + + __slots__ = "_handle" + + def _init(handle): + self = DeviceProperties.__new__(DeviceProperties) + self._handle = handle + return self + + def _get_attribute(self, dev_attr): + return handle_return(driver.cuDeviceGetAttribute(dev_attr, self._handle)) + + @property + def max_threads_per_block(self) -> int: + """ + int: Maximum number of threads per block. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK) + + @property + def max_block_dim_x(self) -> int: + """ + int: Maximum x-dimension of a block. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X) + + @property + def max_block_dim_y(self) -> int: + """ + int: Maximum y-dimension of a block. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y) + + @property + def max_block_dim_z(self) -> int: + """ + int: Maximum z-dimension of a block. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z) + + @property + def max_grid_dim_x(self) -> int: + """ + int: Maximum x-dimension of a grid. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X) + + @property + def max_grid_dim_y(self) -> int: + """ + int: Maximum y-dimension of a grid. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y) + + @property + def max_grid_dim_z(self) -> int: + """ + int: Maximum z-dimension of a grid. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z) + + @property + def max_shared_memory_per_block(self) -> int: + """ + int: Maximum amount of shared memory available to a thread block in bytes. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK) + + @property + def total_constant_memory(self) -> int: + """ + int: Memory available on device for __constant__ variables in a CUDA C kernel in bytes. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY) + + @property + def warp_size(self) -> int: + """ + int: Warp size in threads. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE) + + @property + def max_pitch(self) -> int: + """ + int: Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated + through cuMemAllocPitch(). + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH) + + @property + def maximum_texture1d_width(self) -> int: + """ + int: Maximum 1D texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH) + + @property + def maximum_texture1d_linear_width(self) -> int: + """ + int: Maximum width for a 1D texture bound to linear memory. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH) + + @property + def maximum_texture1d_mipmapped_width(self) -> int: + """ + int: Maximum mipmapped 1D texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH) + + @property + def maximum_texture2d_width(self) -> int: + """ + int: Maximum 2D texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH) + + @property + def maximum_texture2d_height(self) -> int: + """ + int: Maximum 2D texture height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT) + + @property + def maximum_texture2d_linear_width(self) -> int: + """ + int: Maximum width for a 2D texture bound to linear memory. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH) + + @property + def maximum_texture2d_linear_height(self) -> int: + """ + int: Maximum height for a 2D texture bound to linear memory. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT) + + @property + def maximum_texture2d_linear_pitch(self) -> int: + """ + int: Maximum pitch in bytes for a 2D texture bound to linear memory. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH) + + @property + def maximum_texture2d_mipmapped_width(self) -> int: + """ + int: Maximum mipmapped 2D texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH) + + @property + def maximum_texture2d_mipmapped_height(self) -> int: + """ + int: Maximum mipmapped 2D texture height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT) + + @property + def maximum_texture3d_width(self) -> int: + """ + int: Maximum 3D texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH) + + @property + def maximum_texture3d_height(self) -> int: + """ + int: Maximum 3D texture height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT) + + @property + def maximum_texture3d_depth(self) -> int: + """ + int: Maximum 3D texture depth. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH) + + @property + def maximum_texture3d_width_alternate(self) -> int: + """ + int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE) + + @property + def maximum_texture3d_height_alternate(self) -> int: + """ + int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE) + + @property + def maximum_texture3d_depth_alternate(self) -> int: + """ + int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE) + + @property + def maximum_texturecubemap_width(self) -> int: + """ + int: Maximum cubemap texture width or height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH) + + @property + def maximum_texture1d_layered_width(self) -> int: + """ + int: Maximum 1D layered texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH) + + @property + def maximum_texture1d_layered_layers(self) -> int: + """ + int: Maximum layers in a 1D layered texture. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS) + + @property + def maximum_texture2d_layered_width(self) -> int: + """ + int: Maximum 2D layered texture width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH) + + @property + def maximum_texture2d_layered_height(self) -> int: + """ + int: Maximum 2D layered texture height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT) + + @property + def maximum_texture2d_layered_layers(self) -> int: + """ + int: Maximum layers in a 2D layered texture. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS) + + @property + def maximum_texturecubemap_layered_width(self) -> int: + """ + int: Maximum cubemap layered texture width or height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH) + + @property + def maximum_texturecubemap_layered_layers(self) -> int: + """ + int: Maximum layers in a cubemap layered texture. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS) + + @property + def maximum_surface1d_width(self) -> int: + """ + int: Maximum 1D surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH) + + @property + def maximum_surface2d_width(self) -> int: + """ + int: Maximum 2D surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH) + + @property + def maximum_surface2d_height(self) -> int: + """ + int: Maximum 2D surface height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT) + + @property + def maximum_surface3d_width(self) -> int: + """ + int: Maximum 3D surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH) + + @property + def maximum_surface3d_height(self) -> int: + """ + int: Maximum 3D surface height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT) + + @property + def maximum_surface3d_depth(self) -> int: + """ + int: Maximum 3D surface depth. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH) + + @property + def maximum_surface1d_layered_width(self) -> int: + """ + int: Maximum 1D layered surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH) + + @property + def maximum_surface1d_layered_layers(self) -> int: + """ + int: Maximum layers in a 1D layered surface. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS) + + @property + def maximum_surface2d_layered_width(self) -> int: + """ + int: Maximum 2D layered surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH) + + @property + def maximum_surface2d_layered_height(self) -> int: + """ + int: Maximum 2D layered surface height. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT) + + @property + def maximum_surface2d_layered_layers(self) -> int: + """ + int: Maximum layers in a 2D layered surface. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS) + + @property + def maximum_surfacecubemap_width(self) -> int: + """ + int: Maximum cubemap surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH) + + @property + def maximum_surfacecubemap_layered_width(self) -> int: + """ + int: Maximum cubemap layered surface width. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH) + + @property + def maximum_surfacecubemap_layered_layers(self) -> int: + """ + int: Maximum layers in a cubemap layered surface. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS) + + @property + def max_registers_per_block(self) -> int: + """ + int: Maximum number of 32-bit registers available to a thread block. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK) + + @property + def clock_rate(self) -> int: + """ + int: The typical clock frequency in kilohertz. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE) + + @property + def texture_alignment(self) -> int: + """ + int: Alignment requirement; texture base addresses aligned to textureAlign bytes do not need an offset + applied to texture fetches. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT) + + @property + def texture_pitch_alignment(self) -> int: + """ + int: Pitch alignment requirement for 2D texture references bound to pitched memory. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT) + + @property + def gpu_overlap(self) -> bool: + """ + bool: True if the device can concurrently copy memory between host and device while executing a kernel, + False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP)) + + @property + def multiprocessor_count(self) -> int: + """ + int: Number of multiprocessors on the device. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT) + + @property + def kernel_exec_timeout(self) -> bool: + """ + bool: True if there is a run time limit for kernels executed on the device, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT)) + + @property + def integrated(self) -> bool: + """ + bool: True if the device is integrated with the memory subsystem, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED)) + + @property + def can_map_host_memory(self) -> bool: + """ + bool: True if the device can map host memory into the CUDA address space, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY)) + + @property + def compute_mode(self) -> int: + """ + int: Compute mode that device is currently in. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE) + + @property + def concurrent_kernels(self) -> bool: + """ + bool: True if the device supports executing multiple kernels within the same context simultaneously, + False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS)) + + @property + def ecc_enabled(self) -> bool: + """ + bool: True if error correction is enabled on the device, False if error correction is disabled or not + supported by the device. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED)) + + @property + def pci_bus_id(self) -> int: + """ + int: PCI bus identifier of the device. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID) + + @property + def pci_device_id(self) -> int: + """ + int: PCI device (also known as slot) identifier of the device. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID) + + @property + def pci_domain_id(self) -> int: + """ + int: PCI domain identifier of the device. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID) + + @property + def tcc_driver(self) -> bool: + """ + bool: True if the device is using a TCC driver, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER)) + + @property + def memory_clock_rate(self) -> int: + """ + int: Peak memory clock frequency in kilohertz. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE) + + @property + def global_memory_bus_width(self) -> int: + """ + int: Global memory bus width in bits. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH) + + @property + def l2_cache_size(self) -> int: + """ + int: Size of L2 cache in bytes, 0 if the device doesn't have L2 cache. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE) + + @property + def max_threads_per_multiprocessor(self) -> int: + """ + int: Maximum resident threads per multiprocessor. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR) + + @property + def unified_addressing(self) -> bool: + """ + bool: True if the device shares a unified address space with the host, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) + + @property + def compute_capability_major(self) -> int: + """ + int: Major compute capability version number. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) + + @property + def compute_capability_minor(self) -> int: + """ + int: Minor compute capability version number. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR) + + @property + def global_l1_cache_supported(self) -> bool: + """ + True if device supports caching globals in L1 cache, False if caching globals in L1 cache is not supported + by the device. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED)) + + @property + def local_l1_cache_supported(self) -> bool: + """ + True if device supports caching locals in L1 cache, False if caching locals in L1 cache is not supported + by the device. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED)) + + @property + def max_shared_memory_per_multiprocessor(self) -> int: + """ + Maximum amount of shared memory available to a multiprocessor in bytes. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR) + + @property + def max_registers_per_multiprocessor(self) -> int: + """ + Maximum number of 32-bit registers available to a multiprocessor. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR) + + @property + def managed_memory(self) -> bool: + """ + True if device supports allocating managed memory on this system, False if allocating managed memory is not + supported by the device on this system. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY)) + + @property + def multi_gpu_board(self) -> bool: + """ + True if device is on a multi-GPU board, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD)) + + @property + def multi_gpu_board_group_id(self) -> int: + """ + Unique identifier for a group of devices associated with the same board. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID) + + @property + def host_native_atomic_supported(self) -> bool: + """ + True if Link between the device and the host supports native atomic operations, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED)) + + @property + def single_to_double_precision_perf_ratio(self) -> int: + """ + Ratio of single precision performance (in floating-point operations per second) to double precision + performance. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO) + + @property + def pageable_memory_access(self) -> bool: + """ + True if device supports coherently accessing pageable memory without calling cudaHostRegister on it, + False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS)) + + @property + def concurrent_managed_access(self) -> bool: + """ + True if device can coherently access managed memory concurrently with the CPU, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) + + @property + def compute_preemption_supported(self) -> bool: + """ + True if device supports Compute Preemption, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED)) + + @property + def can_use_host_pointer_for_registered_mem(self) -> bool: + """ + True if device can access host registered memory at the same virtual address as the CPU, False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM) + ) + + @property + def max_shared_memory_per_block_optin(self) -> int: + """ + The maximum per block shared memory size supported on this device. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN) + + @property + def pageable_memory_access_uses_host_page_tables(self) -> bool: + """ + True if device accesses pageable memory via the host's page tables, False if not. + """ + return bool( + self._get_attribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES + ) + ) + + @property + def direct_managed_mem_access_from_host(self) -> bool: + """ + True if the host can directly access managed memory on the device without migration, False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST) + ) + + @property + def virtual_memory_management_supported(self) -> bool: + """ + True if device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap + and related APIs, False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED) + ) + + @property + def handle_type_posix_file_descriptor_supported(self) -> bool: + """ + True if device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, + False if not. + """ + return bool( + self._get_attribute( + driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED + ) + ) + + @property + def handle_type_win32_handle_supported(self) -> bool: + """ + True if device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, + False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED) + ) + + @property + def handle_type_win32_kmt_handle_supported(self) -> bool: + """ + True if device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, + False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED) + ) + + @property + def max_blocks_per_multiprocessor(self) -> int: + """ + Maximum number of thread blocks that can reside on a multiprocessor. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR) + + @property + def generic_compression_supported(self) -> bool: + """ + True if device supports compressible memory allocation via cuMemCreate, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED)) + + @property + def max_persisting_l2_cache_size(self) -> int: + """ + Maximum L2 persisting lines capacity setting in bytes. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE) + + @property + def max_access_policy_window_size(self) -> int: + """ + Maximum value of CUaccessPolicyWindow::num_bytes. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE) + + @property + def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool: + """ + True if device supports specifying the GPUDirect RDMA flag with cuMemCreate, False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED) + ) + + @property + def reserved_shared_memory_per_block(self) -> int: + """ + Amount of shared memory per block reserved by CUDA driver in bytes. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK) + + @property + def sparse_cuda_array_supported(self) -> bool: + """ + True if device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED)) + + @property + def read_only_host_register_supported(self) -> bool: + """ + True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register + memory that must be mapped as read-only to the GPU, False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED) + ) + + @property + def memory_pools_supported(self) -> bool: + """ + True if device supports using the cuMemAllocAsync and cuMemPool family of APIs, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED)) + + @property + def gpu_direct_rdma_supported(self) -> bool: + """ + True if device supports GPUDirect RDMA APIs, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED)) + + @property + def gpu_direct_rdma_flush_writes_options(self) -> int: + """ + The returned attribute shall be interpreted as a bitmask, where the individual bits are described by + the CUflushGPUDirectRDMAWritesOptions enum. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS) + + @property + def gpu_direct_rdma_writes_ordering(self) -> int: + """ + GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated + by the returned attribute. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING) + + @property + def mempool_supported_handle_types(self) -> int: + """ + Bitmask of handle types supported with mempool based IPC. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES) + + @property + def deferred_mapping_cuda_array_supported(self) -> bool: + """ + True if device supports deferred mapping CUDA arrays and CUDA mipmapped arrays, False if not. + """ + return bool( + self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED) + ) + + @property + def numa_config(self) -> int: + """ + NUMA configuration of a device. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG) + + @property + def numa_id(self) -> int: + """ + NUMA node ID of the GPU memory. + """ + return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID) + + @property + def multicast_supported(self) -> bool: + """ + True if device supports switch multicast and reduction operations, False if not. + """ + return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED)) + + class Device: """Represent a GPU and act as an entry point for cuda.core features. @@ -41,7 +866,7 @@ class Device: """ - __slots__ = ("_id", "_mr", "_has_inited") + __slots__ = ("_id", "_mr", "_has_inited", "_properties") def __new__(cls, device_id=None): # important: creating a Device instance does not initialize the GPU! @@ -73,6 +898,7 @@ def __new__(cls, device_id=None): dev._mr = _SynchronousMemoryResource(dev_id) dev._has_inited = False + dev._properties = None _tls.devices.append(dev) return _tls.devices[device_id] @@ -124,10 +950,12 @@ def name(self) -> str: return name.decode() @property - def properties(self) -> dict: - """Return information about the compute-device.""" - # TODO: pythonize the key names - return handle_return(runtime.cudaGetDeviceProperties(self._id)) + def properties(self) -> DeviceProperties: + """Return a :obj:`~_device.DeviceProperties` class with information about the device.""" + if self._properties is None: + self._properties = DeviceProperties._init(self._id) + + return self._properties @property def compute_capability(self) -> ComputeCapability: diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index f100eb7c..45d32808 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -16,6 +16,7 @@ CUDA runtime _memory.Buffer _stream.Stream _event.Event + _device.DeviceProperties CUDA compilation toolchain diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 0a34f825..d3e09872 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -9,6 +9,7 @@ Highlights ---------- - Add :class:`~ProgramOptions` to facilitate the passing of runtime compile options to :obj:`~Program`. +- Add :class:`DeviceProperties` to provide pythonic access to device properties. Limitations ----------- @@ -20,3 +21,4 @@ Breaking Changes - Change ``__cuda_stream__`` from attribute to method - The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`. +- :meth: `~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 876299f3..691fa47c 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -11,6 +11,7 @@ except ImportError: from cuda import cuda as driver from cuda import cudart as runtime +import pytest from cuda.core.experimental import Device from cuda.core.experimental._utils import ComputeCapability, handle_return @@ -78,3 +79,130 @@ def test_compute_capability(): ) expected_cc = ComputeCapability(major, minor) assert device.compute_capability == expected_cc + + +cuda_base_properties = [ + ("max_threads_per_block", int), + ("max_block_dim_x", int), + ("max_block_dim_y", int), + ("max_block_dim_z", int), + ("max_grid_dim_x", int), + ("max_grid_dim_y", int), + ("max_grid_dim_z", int), + ("max_shared_memory_per_block", int), + ("total_constant_memory", int), + ("warp_size", int), + ("max_pitch", int), + ("maximum_texture1d_width", int), + ("maximum_texture1d_linear_width", int), + ("maximum_texture1d_mipmapped_width", int), + ("maximum_texture2d_width", int), + ("maximum_texture2d_height", int), + ("maximum_texture2d_linear_width", int), + ("maximum_texture2d_linear_height", int), + ("maximum_texture2d_linear_pitch", int), + ("maximum_texture2d_mipmapped_width", int), + ("maximum_texture2d_mipmapped_height", int), + ("maximum_texture3d_width", int), + ("maximum_texture3d_height", int), + ("maximum_texture3d_depth", int), + ("maximum_texture3d_width_alternate", int), + ("maximum_texture3d_height_alternate", int), + ("maximum_texture3d_depth_alternate", int), + ("maximum_texturecubemap_width", int), + ("maximum_texture1d_layered_width", int), + ("maximum_texture1d_layered_layers", int), + ("maximum_texture2d_layered_width", int), + ("maximum_texture2d_layered_height", int), + ("maximum_texture2d_layered_layers", int), + ("maximum_texturecubemap_layered_width", int), + ("maximum_texturecubemap_layered_layers", int), + ("maximum_surface1d_width", int), + ("maximum_surface2d_width", int), + ("maximum_surface2d_height", int), + ("maximum_surface3d_width", int), + ("maximum_surface3d_height", int), + ("maximum_surface3d_depth", int), + ("maximum_surface1d_layered_width", int), + ("maximum_surface1d_layered_layers", int), + ("maximum_surface2d_layered_width", int), + ("maximum_surface2d_layered_height", int), + ("maximum_surface2d_layered_layers", int), + ("maximum_surfacecubemap_width", int), + ("maximum_surfacecubemap_layered_width", int), + ("maximum_surfacecubemap_layered_layers", int), + ("max_registers_per_block", int), + ("clock_rate", int), + ("texture_alignment", int), + ("texture_pitch_alignment", int), + ("gpu_overlap", bool), + ("multiprocessor_count", int), + ("kernel_exec_timeout", bool), + ("integrated", bool), + ("can_map_host_memory", bool), + ("compute_mode", int), + ("concurrent_kernels", bool), + ("ecc_enabled", bool), + ("pci_bus_id", int), + ("pci_device_id", int), + ("pci_domain_id", int), + ("tcc_driver", bool), + ("memory_clock_rate", int), + ("global_memory_bus_width", int), + ("l2_cache_size", int), + ("max_threads_per_multiprocessor", int), + ("unified_addressing", bool), + ("compute_capability_major", int), + ("compute_capability_minor", int), + ("global_l1_cache_supported", bool), + ("local_l1_cache_supported", bool), + ("max_shared_memory_per_multiprocessor", int), + ("max_registers_per_multiprocessor", int), + ("managed_memory", bool), + ("multi_gpu_board", bool), + ("multi_gpu_board_group_id", int), + ("host_native_atomic_supported", bool), + ("single_to_double_precision_perf_ratio", int), + ("pageable_memory_access", bool), + ("concurrent_managed_access", bool), + ("compute_preemption_supported", bool), + ("can_use_host_pointer_for_registered_mem", bool), + ("max_shared_memory_per_block_optin", int), + ("pageable_memory_access_uses_host_page_tables", bool), + ("direct_managed_mem_access_from_host", bool), + ("virtual_memory_management_supported", bool), + ("handle_type_posix_file_descriptor_supported", bool), + ("handle_type_win32_handle_supported", bool), + ("handle_type_win32_kmt_handle_supported", bool), + ("max_blocks_per_multiprocessor", int), + ("generic_compression_supported", bool), + ("max_persisting_l2_cache_size", int), + ("max_access_policy_window_size", int), + ("gpu_direct_rdma_with_cuda_vmm_supported", bool), + ("reserved_shared_memory_per_block", int), + ("sparse_cuda_array_supported", bool), + ("read_only_host_register_supported", bool), + ("memory_pools_supported", bool), + ("gpu_direct_rdma_supported", bool), + ("gpu_direct_rdma_flush_writes_options", int), + ("gpu_direct_rdma_writes_ordering", int), + ("mempool_supported_handle_types", int), + ("deferred_mapping_cuda_array_supported", bool), + ("numa_config", int), + ("numa_id", int), + ("multicast_supported", bool), +] + + +@pytest.mark.parametrize("property_name, expected_type", cuda_base_properties) +def test_device_property_types(property_name, expected_type): + device = Device() + assert isinstance(getattr(device.properties, property_name), expected_type) + + +def test_device_properties_complete(): + device = Device() + live_props = set(attr for attr in dir(device.properties) if not attr.startswith("_")) + tab_props = set(attr for attr, _ in cuda_base_properties) + assert len(tab_props) == len(cuda_base_properties) # Ensure no duplicates. + assert tab_props == live_props # Ensure exact match.